diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
31 files changed, 86840 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp new file mode 100644 index 0000000000000..e8950b58d42df --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -0,0 +1,20866 @@ +//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run +// both before and after the DAG is legalized. +// +// This pass is not a substitute for the LLVM IR instcombine pass. This pass is +// primarily intended to handle simplification opportunities that are implicit +// in the LLVM IR and exposed by the various codegen lowering phases. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/DAGCombine.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> +#include <iterator> +#include <string> +#include <tuple> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "dagcombine" + +STATISTIC(NodesCombined , "Number of dag nodes combined"); +STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); +STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); +STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); +STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); +STATISTIC(SlicedLoads, "Number of load sliced"); +STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops"); + +static cl::opt<bool> +CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, + cl::desc("Enable DAG combiner's use of IR alias analysis")); + +static cl::opt<bool> +UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), + cl::desc("Enable DAG combiner's use of TBAA")); + +#ifndef NDEBUG +static cl::opt<std::string> +CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, + cl::desc("Only use DAG-combiner alias analysis in this" + " function")); +#endif + +/// Hidden option to stress test load slicing, i.e., when this option +/// is enabled, load slicing bypasses most of its profitability guards. +static cl::opt<bool> +StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, + cl::desc("Bypass the profitability model of load slicing"), + cl::init(false)); + +static cl::opt<bool> + MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), + cl::desc("DAG combiner may split indexing from loads")); + +static cl::opt<bool> + EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), + cl::desc("DAG combiner enable merging multiple stores " + "into a wider store")); + +static cl::opt<unsigned> TokenFactorInlineLimit( + "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), + cl::desc("Limit the number of operands to inline for Token Factors")); + +static cl::opt<unsigned> StoreMergeDependenceLimit( + "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), + cl::desc("Limit the number of times for the same StoreNode and RootNode " + "to bail out in store merging dependence check")); + +namespace { + + class DAGCombiner { + SelectionDAG &DAG; + const TargetLowering &TLI; + CombineLevel Level; + CodeGenOpt::Level OptLevel; + bool LegalOperations = false; + bool LegalTypes = false; + bool ForCodeSize; + + /// Worklist of all of the nodes that need to be simplified. + /// + /// This must behave as a stack -- new nodes to process are pushed onto the + /// back and when processing we pop off of the back. + /// + /// The worklist will not contain duplicates but may contain null entries + /// due to nodes being deleted from the underlying DAG. + SmallVector<SDNode *, 64> Worklist; + + /// Mapping from an SDNode to its position on the worklist. + /// + /// This is used to find and remove nodes from the worklist (by nulling + /// them) when they are deleted from the underlying DAG. It relies on + /// stable indices of nodes within the worklist. + DenseMap<SDNode *, unsigned> WorklistMap; + /// This records all nodes attempted to add to the worklist since we + /// considered a new worklist entry. As we keep do not add duplicate nodes + /// in the worklist, this is different from the tail of the worklist. + SmallSetVector<SDNode *, 32> PruningList; + + /// Set of nodes which have been combined (at least once). + /// + /// This is used to allow us to reliably add any operands of a DAG node + /// which have not yet been combined to the worklist. + SmallPtrSet<SDNode *, 32> CombinedNodes; + + /// Map from candidate StoreNode to the pair of RootNode and count. + /// The count is used to track how many times we have seen the StoreNode + /// with the same RootNode bail out in dependence check. If we have seen + /// the bail out for the same pair many times over a limit, we won't + /// consider the StoreNode with the same RootNode as store merging + /// candidate again. + DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap; + + // AA - Used for DAG load/store alias analysis. + AliasAnalysis *AA; + + /// When an instruction is simplified, add all users of the instruction to + /// the work lists because they might get more simplified now. + void AddUsersToWorklist(SDNode *N) { + for (SDNode *Node : N->uses()) + AddToWorklist(Node); + } + + // Prune potentially dangling nodes. This is called after + // any visit to a node, but should also be called during a visit after any + // failed combine which may have created a DAG node. + void clearAddedDanglingWorklistEntries() { + // Check any nodes added to the worklist to see if they are prunable. + while (!PruningList.empty()) { + auto *N = PruningList.pop_back_val(); + if (N->use_empty()) + recursivelyDeleteUnusedNodes(N); + } + } + + SDNode *getNextWorklistEntry() { + // Before we do any work, remove nodes that are not in use. + clearAddedDanglingWorklistEntries(); + SDNode *N = nullptr; + // The Worklist holds the SDNodes in order, but it may contain null + // entries. + while (!N && !Worklist.empty()) { + N = Worklist.pop_back_val(); + } + + if (N) { + bool GoodWorklistEntry = WorklistMap.erase(N); + (void)GoodWorklistEntry; + assert(GoodWorklistEntry && + "Found a worklist entry without a corresponding map entry!"); + } + return N; + } + + /// Call the node-specific routine that folds each particular type of node. + SDValue visit(SDNode *N); + + public: + DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), AA(AA) { + ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + + MaximumLegalStoreInBits = 0; + for (MVT VT : MVT::all_valuetypes()) + if (EVT(VT).isSimple() && VT != MVT::Other && + TLI.isTypeLegal(EVT(VT)) && + VT.getSizeInBits() >= MaximumLegalStoreInBits) + MaximumLegalStoreInBits = VT.getSizeInBits(); + } + + void ConsiderForPruning(SDNode *N) { + // Mark this for potential pruning. + PruningList.insert(N); + } + + /// Add to the worklist making sure its instance is at the back (next to be + /// processed.) + void AddToWorklist(SDNode *N) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Deleted Node added to Worklist"); + + // Skip handle nodes as they can't usefully be combined and confuse the + // zero-use deletion strategy. + if (N->getOpcode() == ISD::HANDLENODE) + return; + + ConsiderForPruning(N); + + if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) + Worklist.push_back(N); + } + + /// Remove all instances of N from the worklist. + void removeFromWorklist(SDNode *N) { + CombinedNodes.erase(N); + PruningList.remove(N); + StoreRootCountMap.erase(N); + + auto It = WorklistMap.find(N); + if (It == WorklistMap.end()) + return; // Not in the worklist. + + // Null out the entry rather than erasing it to avoid a linear operation. + Worklist[It->second] = nullptr; + WorklistMap.erase(It); + } + + void deleteAndRecombine(SDNode *N); + bool recursivelyDeleteUnusedNodes(SDNode *N); + + /// Replaces all uses of the results of one DAG node with new values. + SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo = true); + + /// Replaces all uses of the results of one DAG node with new values. + SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { + return CombineTo(N, &Res, 1, AddTo); + } + + /// Replaces all uses of the results of one DAG node with new values. + SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, + bool AddTo = true) { + SDValue To[] = { Res0, Res1 }; + return CombineTo(N, To, 2, AddTo); + } + + void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); + + private: + unsigned MaximumLegalStoreInBits; + + /// Check the specified integer node value to see if it can be simplified or + /// if things it uses can be simplified by bit propagation. + /// If so, return true. + bool SimplifyDemandedBits(SDValue Op) { + unsigned BitWidth = Op.getScalarValueSizeInBits(); + APInt DemandedBits = APInt::getAllOnesValue(BitWidth); + return SimplifyDemandedBits(Op, DemandedBits); + } + + bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) { + EVT VT = Op.getValueType(); + unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1; + APInt DemandedElts = APInt::getAllOnesValue(NumElts); + return SimplifyDemandedBits(Op, DemandedBits, DemandedElts); + } + + /// Check the specified vector node value to see if it can be simplified or + /// if things it uses can be simplified as it only uses some of the + /// elements. If so, return true. + bool SimplifyDemandedVectorElts(SDValue Op) { + unsigned NumElts = Op.getValueType().getVectorNumElements(); + APInt DemandedElts = APInt::getAllOnesValue(NumElts); + return SimplifyDemandedVectorElts(Op, DemandedElts); + } + + bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts); + bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, + bool AssumeSingleUse = false); + + bool CombineToPreIndexedLoadStore(SDNode *N); + bool CombineToPostIndexedLoadStore(SDNode *N); + SDValue SplitIndexingFromLoad(LoadSDNode *LD); + bool SliceUpLoad(SDNode *N); + + // Scalars have size 0 to distinguish from singleton vectors. + SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD); + bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val); + bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val); + + /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed + /// load. + /// + /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. + /// \param InVecVT type of the input vector to EVE with bitcasts resolved. + /// \param EltNo index of the vector element to load. + /// \param OriginalLoad load that EVE came from to be replaced. + /// \returns EVE on success SDValue() on failure. + SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, + SDValue EltNo, + LoadSDNode *OriginalLoad); + void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); + SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); + SDValue SExtPromoteOperand(SDValue Op, EVT PVT); + SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); + SDValue PromoteIntBinOp(SDValue Op); + SDValue PromoteIntShiftOp(SDValue Op); + SDValue PromoteExtend(SDValue Op); + bool PromoteLoad(SDValue Op); + + /// Call the node-specific routine that knows how to fold each + /// particular type of node. If that doesn't do anything, try the + /// target-specific DAG combines. + SDValue combine(SDNode *N); + + // Visitation implementation - Implement dag node combining for different + // node types. The semantics are as follows: + // Return Value: + // SDValue.getNode() == 0 - No change was made + // SDValue.getNode() == N - N was replaced, is dead and has been handled. + // otherwise - N should be replaced by the returned Operand. + // + SDValue visitTokenFactor(SDNode *N); + SDValue visitMERGE_VALUES(SDNode *N); + SDValue visitADD(SDNode *N); + SDValue visitADDLike(SDNode *N); + SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference); + SDValue visitSUB(SDNode *N); + SDValue visitADDSAT(SDNode *N); + SDValue visitSUBSAT(SDNode *N); + SDValue visitADDC(SDNode *N); + SDValue visitADDO(SDNode *N); + SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); + SDValue visitSUBC(SDNode *N); + SDValue visitSUBO(SDNode *N); + SDValue visitADDE(SDNode *N); + SDValue visitADDCARRY(SDNode *N); + SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); + SDValue visitSUBE(SDNode *N); + SDValue visitSUBCARRY(SDNode *N); + SDValue visitMUL(SDNode *N); + SDValue visitMULFIX(SDNode *N); + SDValue useDivRem(SDNode *N); + SDValue visitSDIV(SDNode *N); + SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); + SDValue visitUDIV(SDNode *N); + SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N); + SDValue visitREM(SDNode *N); + SDValue visitMULHU(SDNode *N); + SDValue visitMULHS(SDNode *N); + SDValue visitSMUL_LOHI(SDNode *N); + SDValue visitUMUL_LOHI(SDNode *N); + SDValue visitMULO(SDNode *N); + SDValue visitIMINMAX(SDNode *N); + SDValue visitAND(SDNode *N); + SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N); + SDValue visitOR(SDNode *N); + SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); + SDValue visitXOR(SDNode *N); + SDValue SimplifyVBinOp(SDNode *N); + SDValue visitSHL(SDNode *N); + SDValue visitSRA(SDNode *N); + SDValue visitSRL(SDNode *N); + SDValue visitFunnelShift(SDNode *N); + SDValue visitRotate(SDNode *N); + SDValue visitABS(SDNode *N); + SDValue visitBSWAP(SDNode *N); + SDValue visitBITREVERSE(SDNode *N); + SDValue visitCTLZ(SDNode *N); + SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); + SDValue visitCTTZ(SDNode *N); + SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); + SDValue visitCTPOP(SDNode *N); + SDValue visitSELECT(SDNode *N); + SDValue visitVSELECT(SDNode *N); + SDValue visitSELECT_CC(SDNode *N); + SDValue visitSETCC(SDNode *N); + SDValue visitSETCCCARRY(SDNode *N); + SDValue visitSIGN_EXTEND(SDNode *N); + SDValue visitZERO_EXTEND(SDNode *N); + SDValue visitANY_EXTEND(SDNode *N); + SDValue visitAssertExt(SDNode *N); + SDValue visitSIGN_EXTEND_INREG(SDNode *N); + SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); + SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); + SDValue visitTRUNCATE(SDNode *N); + SDValue visitBITCAST(SDNode *N); + SDValue visitBUILD_PAIR(SDNode *N); + SDValue visitFADD(SDNode *N); + SDValue visitFSUB(SDNode *N); + SDValue visitFMUL(SDNode *N); + SDValue visitFMA(SDNode *N); + SDValue visitFDIV(SDNode *N); + SDValue visitFREM(SDNode *N); + SDValue visitFSQRT(SDNode *N); + SDValue visitFCOPYSIGN(SDNode *N); + SDValue visitFPOW(SDNode *N); + SDValue visitSINT_TO_FP(SDNode *N); + SDValue visitUINT_TO_FP(SDNode *N); + SDValue visitFP_TO_SINT(SDNode *N); + SDValue visitFP_TO_UINT(SDNode *N); + SDValue visitFP_ROUND(SDNode *N); + SDValue visitFP_EXTEND(SDNode *N); + SDValue visitFNEG(SDNode *N); + SDValue visitFABS(SDNode *N); + SDValue visitFCEIL(SDNode *N); + SDValue visitFTRUNC(SDNode *N); + SDValue visitFFLOOR(SDNode *N); + SDValue visitFMINNUM(SDNode *N); + SDValue visitFMAXNUM(SDNode *N); + SDValue visitFMINIMUM(SDNode *N); + SDValue visitFMAXIMUM(SDNode *N); + SDValue visitBRCOND(SDNode *N); + SDValue visitBR_CC(SDNode *N); + SDValue visitLOAD(SDNode *N); + + SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); + SDValue replaceStoreOfFPConstant(StoreSDNode *ST); + + SDValue visitSTORE(SDNode *N); + SDValue visitLIFETIME_END(SDNode *N); + SDValue visitINSERT_VECTOR_ELT(SDNode *N); + SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); + SDValue visitBUILD_VECTOR(SDNode *N); + SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitEXTRACT_SUBVECTOR(SDNode *N); + SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitSCALAR_TO_VECTOR(SDNode *N); + SDValue visitINSERT_SUBVECTOR(SDNode *N); + SDValue visitMLOAD(SDNode *N); + SDValue visitMSTORE(SDNode *N); + SDValue visitMGATHER(SDNode *N); + SDValue visitMSCATTER(SDNode *N); + SDValue visitFP_TO_FP16(SDNode *N); + SDValue visitFP16_TO_FP(SDNode *N); + SDValue visitVECREDUCE(SDNode *N); + + SDValue visitFADDForFMACombine(SDNode *N); + SDValue visitFSUBForFMACombine(SDNode *N); + SDValue visitFMULForFMADistributiveCombine(SDNode *N); + + SDValue XformToShuffleWithZero(SDNode *N); + bool reassociationCanBreakAddressingModePattern(unsigned Opc, + const SDLoc &DL, SDValue N0, + SDValue N1); + SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1); + SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1, SDNodeFlags Flags); + + SDValue visitShiftByConstant(SDNode *N); + + SDValue foldSelectOfConstants(SDNode *N); + SDValue foldVSelectOfConstants(SDNode *N); + SDValue foldBinOpIntoSelect(SDNode *BO); + bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); + SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N); + SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, ISD::CondCode CC, + bool NotExtCompare = false); + SDValue convertSelectOfFPConstantsToLoadOffset( + const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, + ISD::CondCode CC); + SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, ISD::CondCode CC); + SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, + const SDLoc &DL); + SDValue unfoldMaskedMerge(SDNode *N); + SDValue unfoldExtremeBitClearingToShifts(SDNode *N); + SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + const SDLoc &DL, bool foldBooleans); + SDValue rebuildSetCC(SDValue N); + + bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const; + bool isOneUseSetCC(SDValue N) const; + bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); + + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp); + SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue CombineExtLoad(SDNode *N); + SDValue CombineZExtLogicopShiftLoad(SDNode *N); + SDValue combineRepeatedFPDivisors(SDNode *N); + SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); + SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); + SDValue BuildSDIV(SDNode *N); + SDValue BuildSDIVPow2(SDNode *N); + SDValue BuildUDIV(SDNode *N); + SDValue BuildLogBase2(SDValue V, const SDLoc &DL); + SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); + SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); + SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); + SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); + SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, + SDNodeFlags Flags, bool Reciprocal); + SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, + SDNodeFlags Flags, bool Reciprocal); + SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits = true); + SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, + SDValue InnerPos, SDValue InnerNeg, + unsigned PosOpcode, unsigned NegOpcode, + const SDLoc &DL); + SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); + SDValue MatchLoadCombine(SDNode *N); + SDValue MatchStoreCombine(StoreSDNode *N); + SDValue ReduceLoadWidth(SDNode *N); + SDValue ReduceLoadOpStoreWidth(SDNode *N); + SDValue splitMergedValStore(StoreSDNode *ST); + SDValue TransformFPLoadStorePair(SDNode *N); + SDValue convertBuildVecZextToZext(SDNode *N); + SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); + SDValue reduceBuildVecToShuffle(SDNode *N); + SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, + ArrayRef<int> VectorMask, SDValue VecIn1, + SDValue VecIn2, unsigned LeftIdx, + bool DidSplitVec); + SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast); + + /// Walk up chain skipping non-aliasing memory nodes, + /// looking for aliasing nodes and adding them to the Aliases vector. + void GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVectorImpl<SDValue> &Aliases); + + /// Return true if there is any possibility that the two addresses overlap. + bool isAlias(SDNode *Op0, SDNode *Op1) const; + + /// Walk up chain skipping non-aliasing memory nodes, looking for a better + /// chain (aliasing node.) + SDValue FindBetterChain(SDNode *N, SDValue Chain); + + /// Try to replace a store and any possibly adjacent stores on + /// consecutive chains with better chains. Return true only if St is + /// replaced. + /// + /// Notice that other chains may still be replaced even if the function + /// returns false. + bool findBetterNeighborChains(StoreSDNode *St); + + // Helper for findBetterNeighborChains. Walk up store chain add additional + // chained stores that do not overlap and can be parallelized. + bool parallelizeChainedStores(StoreSDNode *St); + + /// Holds a pointer to an LSBaseSDNode as well as information on where it + /// is located in a sequence of memory operations connected by a chain. + struct MemOpLink { + // Ptr to the mem node. + LSBaseSDNode *MemNode; + + // Offset from the base ptr. + int64_t OffsetFromBase; + + MemOpLink(LSBaseSDNode *N, int64_t Offset) + : MemNode(N), OffsetFromBase(Offset) {} + }; + + /// This is a helper function for visitMUL to check the profitability + /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). + /// MulNode is the original multiply, AddNode is (add x, c1), + /// and ConstNode is c2. + bool isMulAddWithConstProfitable(SDNode *MulNode, + SDValue &AddNode, + SDValue &ConstNode); + + /// This is a helper function for visitAND and visitZERO_EXTEND. Returns + /// true if the (and (load x) c) pattern matches an extload. ExtVT returns + /// the type of the loaded value to be extended. + bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, EVT &ExtVT); + + /// Helper function to calculate whether the given Load/Store can have its + /// width reduced to ExtVT. + bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType, + EVT &MemVT, unsigned ShAmt = 0); + + /// Used by BackwardsPropagateMask to find suitable loads. + bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads, + SmallPtrSetImpl<SDNode*> &NodesWithConsts, + ConstantSDNode *Mask, SDNode *&NodeToMask); + /// Attempt to propagate a given AND node back to load leaves so that they + /// can be combined into narrow loads. + bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); + + /// Helper function for MergeConsecutiveStores which merges the + /// component store chains. + SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, + unsigned NumStores); + + /// This is a helper function for MergeConsecutiveStores. When the + /// source elements of the consecutive stores are all constants or + /// all extracted vector elements, try to merge them into one + /// larger store introducing bitcasts if necessary. \return True + /// if a merged store was created. + bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, + EVT MemVT, unsigned NumStores, + bool IsConstantSrc, bool UseVector, + bool UseTrunc); + + /// This is a helper function for MergeConsecutiveStores. Stores + /// that potentially may be merged with St are placed in + /// StoreNodes. RootNode is a chain predecessor to all store + /// candidates. + void getStoreMergeCandidates(StoreSDNode *St, + SmallVectorImpl<MemOpLink> &StoreNodes, + SDNode *&Root); + + /// Helper function for MergeConsecutiveStores. Checks if + /// candidate stores have indirect dependency through their + /// operands. RootNode is the predecessor to all stores calculated + /// by getStoreMergeCandidates and is used to prune the dependency check. + /// \return True if safe to merge. + bool checkMergeStoreCandidatesForDependencies( + SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, + SDNode *RootNode); + + /// Merge consecutive store operations into a wide store. + /// This optimization uses wide integers or vectors when possible. + /// \return number of stores that were merged into a merged store (the + /// affected nodes are stored as a prefix in \p StoreNodes). + bool MergeConsecutiveStores(StoreSDNode *St); + + /// Try to transform a truncation where C is a constant: + /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) + /// + /// \p N needs to be a truncation and its first operand an AND. Other + /// requirements are checked by the function (e.g. that trunc is + /// single-use) and if missed an empty SDValue is returned. + SDValue distributeTruncateThroughAnd(SDNode *N); + + /// Helper function to determine whether the target supports operation + /// given by \p Opcode for type \p VT, that is, whether the operation + /// is legal or custom before legalizing operations, and whether is + /// legal (but not custom) after legalization. + bool hasOperation(unsigned Opcode, EVT VT) { + if (LegalOperations) + return TLI.isOperationLegal(Opcode, VT); + return TLI.isOperationLegalOrCustom(Opcode, VT); + } + + public: + /// Runs the dag combiner on all nodes in the work list + void Run(CombineLevel AtLevel); + + SelectionDAG &getDAG() const { return DAG; } + + /// Returns a type large enough to hold any valid shift amount - before type + /// legalization these can be huge. + EVT getShiftAmountTy(EVT LHSTy) { + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes); + } + + /// This method returns true if we are running before type legalization or + /// if the specified VT is legal. + bool isTypeLegal(const EVT &VT) { + if (!LegalTypes) return true; + return TLI.isTypeLegal(VT); + } + + /// Convenience wrapper around TargetLowering::getSetCCResultType + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + } + + void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue OrigLoad, SDValue ExtLoad, + ISD::NodeType ExtType); + }; + +/// This class is a DAGUpdateListener that removes any deleted +/// nodes from the worklist. +class WorklistRemover : public SelectionDAG::DAGUpdateListener { + DAGCombiner &DC; + +public: + explicit WorklistRemover(DAGCombiner &dc) + : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} + + void NodeDeleted(SDNode *N, SDNode *E) override { + DC.removeFromWorklist(N); + } +}; + +class WorklistInserter : public SelectionDAG::DAGUpdateListener { + DAGCombiner &DC; + +public: + explicit WorklistInserter(DAGCombiner &dc) + : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} + + // FIXME: Ideally we could add N to the worklist, but this causes exponential + // compile time costs in large DAGs, e.g. Halide. + void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); } +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// TargetLowering::DAGCombinerInfo implementation +//===----------------------------------------------------------------------===// + +void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { + ((DAGCombiner*)DC)->AddToWorklist(N); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); +} + +bool TargetLowering::DAGCombinerInfo:: +recursivelyDeleteUnusedNodes(SDNode *N) { + return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N); +} + +void TargetLowering::DAGCombinerInfo:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); +} + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +void DAGCombiner::deleteAndRecombine(SDNode *N) { + removeFromWorklist(N); + + // If the operands of this node are only used by the node, they will now be + // dead. Make sure to re-visit them and recursively delete dead nodes. + for (const SDValue &Op : N->ops()) + // For an operand generating multiple values, one of the values may + // become dead allowing further simplification (e.g. split index + // arithmetic from an indexed load). + if (Op->hasOneUse() || Op->getNumValues() > 1) + AddToWorklist(Op.getNode()); + + DAG.DeleteNode(N); +} + +// APInts must be the same size for most operations, this helper +// function zero extends the shorter of the pair so that they match. +// We provide an Offset so that we can create bitwidths that won't overflow. +static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { + unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); + LHS = LHS.zextOrSelf(Bits); + RHS = RHS.zextOrSelf(Bits); +} + +// Return true if this node is a setcc, or is a select_cc +// that selects between the target values used for true and false, making it +// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to +// the appropriate nodes based on the type of node we are checking. This +// simplifies life a bit for the callers. +bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const { + if (N.getOpcode() == ISD::SETCC) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(2); + return true; + } + + if (N.getOpcode() != ISD::SELECT_CC || + !TLI.isConstTrueVal(N.getOperand(2).getNode()) || + !TLI.isConstFalseVal(N.getOperand(3).getNode())) + return false; + + if (TLI.getBooleanContents(N.getValueType()) == + TargetLowering::UndefinedBooleanContent) + return false; + + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; +} + +/// Return true if this is a SetCC-equivalent operation with only one use. +/// If this is true, it allows the users to invert the operation for free when +/// it is profitable to do so. +bool DAGCombiner::isOneUseSetCC(SDValue N) const { + SDValue N0, N1, N2; + if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) + return true; + return false; +} + +// Returns the SDNode if it is a constant float BuildVector +// or constant float. +static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { + if (isa<ConstantFPSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) + return N.getNode(); + return nullptr; +} + +// Determines if it is a constant integer or a build vector of constant +// integers (and undefs). +// Do not permit build vector implicit truncation. +static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N)) + return !(Const->isOpaque() && NoOpaques); + if (N.getOpcode() != ISD::BUILD_VECTOR) + return false; + unsigned BitWidth = N.getScalarValueSizeInBits(); + for (const SDValue &Op : N->op_values()) { + if (Op.isUndef()) + continue; + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op); + if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth || + (Const->isOpaque() && NoOpaques)) + return false; + } + return true; +} + +// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with +// undef's. +static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) { + if (V.getOpcode() != ISD::BUILD_VECTOR) + return false; + return isConstantOrConstantVector(V, NoOpaques) || + ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); +} + +bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, + const SDLoc &DL, + SDValue N0, + SDValue N1) { + // Currently this only tries to ensure we don't undo the GEP splits done by + // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this, + // we check if the following transformation would be problematic: + // (load/store (add, (add, x, offset1), offset2)) -> + // (load/store (add, x, offset1+offset2)). + + if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) + return false; + + if (N0.hasOneUse()) + return false; + + auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + auto *C2 = dyn_cast<ConstantSDNode>(N1); + if (!C1 || !C2) + return false; + + const APInt &C1APIntVal = C1->getAPIntValue(); + const APInt &C2APIntVal = C2->getAPIntValue(); + if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64) + return false; + + const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; + if (CombinedValueIntVal.getBitWidth() > 64) + return false; + const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); + + for (SDNode *Node : N0->uses()) { + auto LoadStore = dyn_cast<MemSDNode>(Node); + if (LoadStore) { + // Is x[offset2] already not a legal addressing mode? If so then + // reassociating the constants breaks nothing (we test offset2 because + // that's the one we hope to fold into the load or store). + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.BaseOffs = C2APIntVal.getSExtValue(); + EVT VT = LoadStore->getMemoryVT(); + unsigned AS = LoadStore->getAddressSpace(); + Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + continue; + + // Would x[offset1+offset2] still be a legal addressing mode? + AM.BaseOffs = CombinedValue; + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + return true; + } + } + + return false; +} + +// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression +// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc. +SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, + SDValue N0, SDValue N1) { + EVT VT = N0.getValueType(); + + if (N0.getOpcode() != Opc) + return SDValue(); + + // Don't reassociate reductions. + if (N0->getFlags().hasVectorReduction()) + return SDValue(); + + if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2)) + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + return SDValue(); + } + if (N0.hasOneUse()) { + // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) + // iff (op x, c1) has one use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); + if (!OpNode.getNode()) + return SDValue(); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + } + } + return SDValue(); +} + +// Try to reassociate commutative binops. +SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1, SDNodeFlags Flags) { + assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative."); + // Don't reassociate reductions. + if (Flags.hasVectorReduction()) + return SDValue(); + + // Floating-point reassociation is not allowed without loose FP math. + if (N0.getValueType().isFloatingPoint() || + N1.getValueType().isFloatingPoint()) + if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros()) + return SDValue(); + + if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1)) + return Combined; + if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0)) + return Combined; + return SDValue(); +} + +SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo) { + assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); + ++NodesCombined; + LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; + To[0].getNode()->dump(&DAG); + dbgs() << " and " << NumTo - 1 << " other values\n"); + for (unsigned i = 0, e = NumTo; i != e; ++i) + assert((!To[i].getNode() || + N->getValueType(i) == To[i].getValueType()) && + "Cannot combine value to value of different type!"); + + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesWith(N, To); + if (AddTo) { + // Push the new nodes and any users onto the worklist + for (unsigned i = 0, e = NumTo; i != e; ++i) { + if (To[i].getNode()) { + AddToWorklist(To[i].getNode()); + AddUsersToWorklist(To[i].getNode()); + } + } + } + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) + deleteAndRecombine(N); + return SDValue(N, 0); +} + +void DAGCombiner:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + // Replace all uses. If any nodes become isomorphic to other nodes and + // are deleted, make sure to remove them from our worklist. + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); + + // Push the new node and any (possibly new) users onto the worklist. + AddToWorklist(TLO.New.getNode()); + AddUsersToWorklist(TLO.New.getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (TLO.Old.getNode()->use_empty()) + deleteAndRecombine(TLO.Old.getNode()); +} + +/// Check the specified integer node value to see if it can be simplified or if +/// things it uses can be simplified by bit propagation. If so, return true. +bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts) { + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); + KnownBits Known; + if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO)) + return false; + + // Revisit the node. + AddToWorklist(Op.getNode()); + + // Replace the old value with the new one. + ++NodesCombined; + LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); + + CommitTargetLoweringOpt(TLO); + return true; +} + +/// Check the specified vector node value to see if it can be simplified or +/// if things it uses can be simplified as it only uses some of the elements. +/// If so, return true. +bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, + const APInt &DemandedElts, + bool AssumeSingleUse) { + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); + APInt KnownUndef, KnownZero; + if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, + TLO, 0, AssumeSingleUse)) + return false; + + // Revisit the node. + AddToWorklist(Op.getNode()); + + // Replace the old value with the new one. + ++NodesCombined; + LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); + + CommitTargetLoweringOpt(TLO); + return true; +} + +void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { + SDLoc DL(Load); + EVT VT = Load->getValueType(0); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); + + LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; + Trunc.getNode()->dump(&DAG); dbgs() << '\n'); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); + deleteAndRecombine(Load); + AddToWorklist(Trunc.getNode()); +} + +SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { + Replace = false; + SDLoc DL(Op); + if (ISD::isUNINDEXEDLoad(Op.getNode())) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD + : LD->getExtensionType(); + Replace = true; + return DAG.getExtLoad(ExtType, DL, PVT, + LD->getChain(), LD->getBasePtr(), + MemVT, LD->getMemOperand()); + } + + unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: break; + case ISD::AssertSext: + if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT)) + return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1)); + break; + case ISD::AssertZext: + if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT)) + return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1)); + break; + case ISD::Constant: { + unsigned ExtOpc = + Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOpc, DL, PVT, Op); + } + } + + if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) + return SDValue(); + return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op); +} + +SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { + if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) + return SDValue(); + EVT OldVT = Op.getValueType(); + SDLoc DL(Op); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (!NewOp.getNode()) + return SDValue(); + AddToWorklist(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp, + DAG.getValueType(OldVT)); +} + +SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { + EVT OldVT = Op.getValueType(); + SDLoc DL(Op); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (!NewOp.getNode()) + return SDValue(); + AddToWorklist(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getZeroExtendInReg(NewOp, DL, OldVT); +} + +/// Promote the specified integer binary operation if the target indicates it is +/// beneficial. e.g. On x86, it's usually better to promote i16 operations to +/// i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + + bool Replace0 = false; + SDValue N0 = Op.getOperand(0); + SDValue NN0 = PromoteOperand(N0, PVT, Replace0); + + bool Replace1 = false; + SDValue N1 = Op.getOperand(1); + SDValue NN1 = PromoteOperand(N1, PVT, Replace1); + SDLoc DL(Op); + + SDValue RV = + DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1)); + + // We are always replacing N0/N1's use in N and only need + // additional replacements if there are additional uses. + Replace0 &= !N0->hasOneUse(); + Replace1 &= (N0 != N1) && !N1->hasOneUse(); + + // Combine Op here so it is preserved past replacements. + CombineTo(Op.getNode(), RV); + + // If operands have a use ordering, make sure we deal with + // predecessor first. + if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) { + std::swap(N0, N1); + std::swap(NN0, NN1); + } + + if (Replace0) { + AddToWorklist(NN0.getNode()); + ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); + } + if (Replace1) { + AddToWorklist(NN1.getNode()); + ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); + } + return Op; + } + return SDValue(); +} + +/// Promote the specified integer shift operation if the target indicates it is +/// beneficial. e.g. On x86, it's usually better to promote i16 operations to +/// i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + + bool Replace = false; + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + if (Opc == ISD::SRA) + N0 = SExtPromoteOperand(N0, PVT); + else if (Opc == ISD::SRL) + N0 = ZExtPromoteOperand(N0, PVT); + else + N0 = PromoteOperand(N0, PVT, Replace); + + if (!N0.getNode()) + return SDValue(); + + SDLoc DL(Op); + SDValue RV = + DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); + + // Deal with Op being deleted. + if (Op && Op.getOpcode() != ISD::DELETED_NODE) + return RV; + } + return SDValue(); +} + +SDValue DAGCombiner::PromoteExtend(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); + } + return SDValue(); +} + +bool DAGCombiner::PromoteLoad(SDValue Op) { + if (!LegalOperations) + return false; + + if (!ISD::isUNINDEXEDLoad(Op.getNode())) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return false; + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return false; + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + SDLoc DL(Op); + SDNode *N = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD + : LD->getExtensionType(); + SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, + LD->getChain(), LD->getBasePtr(), + MemVT, LD->getMemOperand()); + SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); + + LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; + Result.getNode()->dump(&DAG); dbgs() << '\n'); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); + deleteAndRecombine(N); + AddToWorklist(Result.getNode()); + return true; + } + return false; +} + +/// Recursively delete a node which has no uses and any operands for +/// which it is the only use. +/// +/// Note that this both deletes the nodes and removes them from the worklist. +/// It also adds any nodes who have had a user deleted to the worklist as they +/// may now have only one use and subject to other combines. +bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { + if (!N->use_empty()) + return false; + + SmallSetVector<SDNode *, 16> Nodes; + Nodes.insert(N); + do { + N = Nodes.pop_back_val(); + if (!N) + continue; + + if (N->use_empty()) { + for (const SDValue &ChildN : N->op_values()) + Nodes.insert(ChildN.getNode()); + + removeFromWorklist(N); + DAG.DeleteNode(N); + } else { + AddToWorklist(N); + } + } while (!Nodes.empty()); + return true; +} + +//===----------------------------------------------------------------------===// +// Main DAG Combiner implementation +//===----------------------------------------------------------------------===// + +void DAGCombiner::Run(CombineLevel AtLevel) { + // set the instance variables, so that the various visit routines may use it. + Level = AtLevel; + LegalOperations = Level >= AfterLegalizeVectorOps; + LegalTypes = Level >= AfterLegalizeTypes; + + WorklistInserter AddNodes(*this); + + // Add all the dag nodes to the worklist. + for (SDNode &Node : DAG.allnodes()) + AddToWorklist(&Node); + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + + // While we have a valid worklist entry node, try to combine it. + while (SDNode *N = getNextWorklistEntry()) { + // If N has no uses, it is dead. Make sure to revisit all N's operands once + // N is deleted from the DAG, since they too may now be dead or may have a + // reduced number of uses, allowing other xforms. + if (recursivelyDeleteUnusedNodes(N)) + continue; + + WorklistRemover DeadNodes(*this); + + // If this combine is running after legalizing the DAG, re-legalize any + // nodes pulled off the worklist. + if (Level == AfterLegalizeDAG) { + SmallSetVector<SDNode *, 16> UpdatedNodes; + bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); + + for (SDNode *LN : UpdatedNodes) { + AddUsersToWorklist(LN); + AddToWorklist(LN); + } + if (!NIsValid) + continue; + } + + LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); + + // Add any operands of the new node which have not yet been combined to the + // worklist as well. Because the worklist uniques things already, this + // won't repeatedly process the same operand. + CombinedNodes.insert(N); + for (const SDValue &ChildN : N->op_values()) + if (!CombinedNodes.count(ChildN.getNode())) + AddToWorklist(ChildN.getNode()); + + SDValue RV = combine(N); + + if (!RV.getNode()) + continue; + + ++NodesCombined; + + // If we get back the same node we passed in, rather than a new node or + // zero, we know that the node must have defined multiple values and + // CombineTo was used. Since CombineTo takes care of the worklist + // mechanics for us, we have no work to do in this case. + if (RV.getNode() == N) + continue; + + assert(N->getOpcode() != ISD::DELETED_NODE && + RV.getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned new node!"); + + LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); + + if (N->getNumValues() == RV.getNode()->getNumValues()) + DAG.ReplaceAllUsesWith(N, RV.getNode()); + else { + assert(N->getValueType(0) == RV.getValueType() && + N->getNumValues() == 1 && "Type mismatch"); + DAG.ReplaceAllUsesWith(N, &RV); + } + + // Push the new node and any users onto the worklist + AddToWorklist(RV.getNode()); + AddUsersToWorklist(RV.getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. This will also take care of adding any + // operands which have lost a user to the worklist. + recursivelyDeleteUnusedNodes(N); + } + + // If the root changed (e.g. it was a dead load, update the root). + DAG.setRoot(Dummy.getValue()); + DAG.RemoveDeadNodes(); +} + +SDValue DAGCombiner::visit(SDNode *N) { + switch (N->getOpcode()) { + default: break; + case ISD::TokenFactor: return visitTokenFactor(N); + case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); + case ISD::ADD: return visitADD(N); + case ISD::SUB: return visitSUB(N); + case ISD::SADDSAT: + case ISD::UADDSAT: return visitADDSAT(N); + case ISD::SSUBSAT: + case ISD::USUBSAT: return visitSUBSAT(N); + case ISD::ADDC: return visitADDC(N); + case ISD::SADDO: + case ISD::UADDO: return visitADDO(N); + case ISD::SUBC: return visitSUBC(N); + case ISD::SSUBO: + case ISD::USUBO: return visitSUBO(N); + case ISD::ADDE: return visitADDE(N); + case ISD::ADDCARRY: return visitADDCARRY(N); + case ISD::SUBE: return visitSUBE(N); + case ISD::SUBCARRY: return visitSUBCARRY(N); + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: return visitMULFIX(N); + case ISD::MUL: return visitMUL(N); + case ISD::SDIV: return visitSDIV(N); + case ISD::UDIV: return visitUDIV(N); + case ISD::SREM: + case ISD::UREM: return visitREM(N); + case ISD::MULHU: return visitMULHU(N); + case ISD::MULHS: return visitMULHS(N); + case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); + case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); + case ISD::SMULO: + case ISD::UMULO: return visitMULO(N); + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: return visitIMINMAX(N); + case ISD::AND: return visitAND(N); + case ISD::OR: return visitOR(N); + case ISD::XOR: return visitXOR(N); + case ISD::SHL: return visitSHL(N); + case ISD::SRA: return visitSRA(N); + case ISD::SRL: return visitSRL(N); + case ISD::ROTR: + case ISD::ROTL: return visitRotate(N); + case ISD::FSHL: + case ISD::FSHR: return visitFunnelShift(N); + case ISD::ABS: return visitABS(N); + case ISD::BSWAP: return visitBSWAP(N); + case ISD::BITREVERSE: return visitBITREVERSE(N); + case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); + case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); + case ISD::CTPOP: return visitCTPOP(N); + case ISD::SELECT: return visitSELECT(N); + case ISD::VSELECT: return visitVSELECT(N); + case ISD::SELECT_CC: return visitSELECT_CC(N); + case ISD::SETCC: return visitSETCC(N); + case ISD::SETCCCARRY: return visitSETCCCARRY(N); + case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); + case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); + case ISD::ANY_EXTEND: return visitANY_EXTEND(N); + case ISD::AssertSext: + case ISD::AssertZext: return visitAssertExt(N); + case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); + case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); + case ISD::TRUNCATE: return visitTRUNCATE(N); + case ISD::BITCAST: return visitBITCAST(N); + case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); + case ISD::FADD: return visitFADD(N); + case ISD::FSUB: return visitFSUB(N); + case ISD::FMUL: return visitFMUL(N); + case ISD::FMA: return visitFMA(N); + case ISD::FDIV: return visitFDIV(N); + case ISD::FREM: return visitFREM(N); + case ISD::FSQRT: return visitFSQRT(N); + case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); + case ISD::FPOW: return visitFPOW(N); + case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); + case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); + case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); + case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); + case ISD::FP_ROUND: return visitFP_ROUND(N); + case ISD::FP_EXTEND: return visitFP_EXTEND(N); + case ISD::FNEG: return visitFNEG(N); + case ISD::FABS: return visitFABS(N); + case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FMINNUM: return visitFMINNUM(N); + case ISD::FMAXNUM: return visitFMAXNUM(N); + case ISD::FMINIMUM: return visitFMINIMUM(N); + case ISD::FMAXIMUM: return visitFMAXIMUM(N); + case ISD::FCEIL: return visitFCEIL(N); + case ISD::FTRUNC: return visitFTRUNC(N); + case ISD::BRCOND: return visitBRCOND(N); + case ISD::BR_CC: return visitBR_CC(N); + case ISD::LOAD: return visitLOAD(N); + case ISD::STORE: return visitSTORE(N); + case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); + case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); + case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); + case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); + case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); + case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); + case ISD::MGATHER: return visitMGATHER(N); + case ISD::MLOAD: return visitMLOAD(N); + case ISD::MSCATTER: return visitMSCATTER(N); + case ISD::MSTORE: return visitMSTORE(N); + case ISD::LIFETIME_END: return visitLIFETIME_END(N); + case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); + case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N); + } + return SDValue(); +} + +SDValue DAGCombiner::combine(SDNode *N) { + SDValue RV = visit(N); + + // If nothing happened, try a target-specific DAG combine. + if (!RV.getNode()) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned NULL!"); + + if (N->getOpcode() >= ISD::BUILTIN_OP_END || + TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { + + // Expose the DAG combiner to the target combiner impls. + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level, false, this); + + RV = TLI.PerformDAGCombine(N, DagCombineInfo); + } + } + + // If nothing happened still, try promoting the operation. + if (!RV.getNode()) { + switch (N->getOpcode()) { + default: break; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + RV = PromoteIntBinOp(SDValue(N, 0)); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + RV = PromoteIntShiftOp(SDValue(N, 0)); + break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + RV = PromoteExtend(SDValue(N, 0)); + break; + case ISD::LOAD: + if (PromoteLoad(SDValue(N, 0))) + RV = SDValue(N, 0); + break; + } + } + + // If N is a commutative binary node, try to eliminate it if the commuted + // version is already present in the DAG. + if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && + N->getNumValues() == 1) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Constant operands are canonicalized to RHS. + if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) { + SDValue Ops[] = {N1, N0}; + SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, + N->getFlags()); + if (CSENode) + return SDValue(CSENode, 0); + } + } + + return RV; +} + +/// Given a node, return its input chain if it has one, otherwise return a null +/// sd operand. +static SDValue getInputChainForNode(SDNode *N) { + if (unsigned NumOps = N->getNumOperands()) { + if (N->getOperand(0).getValueType() == MVT::Other) + return N->getOperand(0); + if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + return N->getOperand(NumOps-1); + for (unsigned i = 1; i < NumOps-1; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + return N->getOperand(i); + } + return SDValue(); +} + +SDValue DAGCombiner::visitTokenFactor(SDNode *N) { + // If N has two operands, where one has an input chain equal to the other, + // the 'other' chain is redundant. + if (N->getNumOperands() == 2) { + if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) + return N->getOperand(0); + if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) + return N->getOperand(1); + } + + // Don't simplify token factors if optnone. + if (OptLevel == CodeGenOpt::None) + return SDValue(); + + // If the sole user is a token factor, we should make sure we have a + // chance to merge them together. This prevents TF chains from inhibiting + // optimizations. + if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor) + AddToWorklist(*(N->use_begin())); + + SmallVector<SDNode *, 8> TFs; // List of token factors to visit. + SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. + SmallPtrSet<SDNode*, 16> SeenOps; + bool Changed = false; // If we should replace this token factor. + + // Start out with this token factor. + TFs.push_back(N); + + // Iterate through token factors. The TFs grows when new token factors are + // encountered. + for (unsigned i = 0; i < TFs.size(); ++i) { + // Limit number of nodes to inline, to avoid quadratic compile times. + // We have to add the outstanding Token Factors to Ops, otherwise we might + // drop Ops from the resulting Token Factors. + if (Ops.size() > TokenFactorInlineLimit) { + for (unsigned j = i; j < TFs.size(); j++) + Ops.emplace_back(TFs[j], 0); + // Drop unprocessed Token Factors from TFs, so we do not add them to the + // combiner worklist later. + TFs.resize(i); + break; + } + + SDNode *TF = TFs[i]; + // Check each of the operands. + for (const SDValue &Op : TF->op_values()) { + switch (Op.getOpcode()) { + case ISD::EntryToken: + // Entry tokens don't need to be added to the list. They are + // redundant. + Changed = true; + break; + + case ISD::TokenFactor: + if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { + // Queue up for processing. + TFs.push_back(Op.getNode()); + Changed = true; + break; + } + LLVM_FALLTHROUGH; + + default: + // Only add if it isn't already in the list. + if (SeenOps.insert(Op.getNode()).second) + Ops.push_back(Op); + else + Changed = true; + break; + } + } + } + + // Re-visit inlined Token Factors, to clean them up in case they have been + // removed. Skip the first Token Factor, as this is the current node. + for (unsigned i = 1, e = TFs.size(); i < e; i++) + AddToWorklist(TFs[i]); + + // Remove Nodes that are chained to another node in the list. Do so + // by walking up chains breath-first stopping when we've seen + // another operand. In general we must climb to the EntryNode, but we can exit + // early if we find all remaining work is associated with just one operand as + // no further pruning is possible. + + // List of nodes to search through and original Ops from which they originate. + SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist; + SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op. + SmallPtrSet<SDNode *, 16> SeenChains; + bool DidPruneOps = false; + + unsigned NumLeftToConsider = 0; + for (const SDValue &Op : Ops) { + Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++)); + OpWorkCount.push_back(1); + } + + auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) { + // If this is an Op, we can remove the op from the list. Remark any + // search associated with it as from the current OpNumber. + if (SeenOps.count(Op) != 0) { + Changed = true; + DidPruneOps = true; + unsigned OrigOpNumber = 0; + while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op) + OrigOpNumber++; + assert((OrigOpNumber != Ops.size()) && + "expected to find TokenFactor Operand"); + // Re-mark worklist from OrigOpNumber to OpNumber + for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) { + if (Worklist[i].second == OrigOpNumber) { + Worklist[i].second = OpNumber; + } + } + OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber]; + OpWorkCount[OrigOpNumber] = 0; + NumLeftToConsider--; + } + // Add if it's a new chain + if (SeenChains.insert(Op).second) { + OpWorkCount[OpNumber]++; + Worklist.push_back(std::make_pair(Op, OpNumber)); + } + }; + + for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) { + // We need at least be consider at least 2 Ops to prune. + if (NumLeftToConsider <= 1) + break; + auto CurNode = Worklist[i].first; + auto CurOpNumber = Worklist[i].second; + assert((OpWorkCount[CurOpNumber] > 0) && + "Node should not appear in worklist"); + switch (CurNode->getOpcode()) { + case ISD::EntryToken: + // Hitting EntryToken is the only way for the search to terminate without + // hitting + // another operand's search. Prevent us from marking this operand + // considered. + NumLeftToConsider++; + break; + case ISD::TokenFactor: + for (const SDValue &Op : CurNode->op_values()) + AddToWorklist(i, Op.getNode(), CurOpNumber); + break; + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + case ISD::CopyFromReg: + case ISD::CopyToReg: + AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber); + break; + default: + if (auto *MemNode = dyn_cast<MemSDNode>(CurNode)) + AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber); + break; + } + OpWorkCount[CurOpNumber]--; + if (OpWorkCount[CurOpNumber] == 0) + NumLeftToConsider--; + } + + // If we've changed things around then replace token factor. + if (Changed) { + SDValue Result; + if (Ops.empty()) { + // The entry token is the only possible outcome. + Result = DAG.getEntryNode(); + } else { + if (DidPruneOps) { + SmallVector<SDValue, 8> PrunedOps; + // + for (const SDValue &Op : Ops) { + if (SeenChains.count(Op.getNode()) == 0) + PrunedOps.push_back(Op); + } + Result = DAG.getTokenFactor(SDLoc(N), PrunedOps); + } else { + Result = DAG.getTokenFactor(SDLoc(N), Ops); + } + } + return Result; + } + return SDValue(); +} + +/// MERGE_VALUES can always be eliminated. +SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { + WorklistRemover DeadNodes(*this); + // Replacing results may cause a different MERGE_VALUES to suddenly + // be CSE'd with N, and carry its uses with it. Iterate until no + // uses remain, to ensure that the node can be safely deleted. + // First add the users of this node to the work list so that they + // can be tried again once they have new operands. + AddUsersToWorklist(N); + do { + // Do as a single replacement to avoid rewalking use lists. + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Ops.push_back(N->getOperand(i)); + DAG.ReplaceAllUsesWith(N, Ops.data()); + } while (!N->use_empty()); + deleteAndRecombine(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a +/// ConstantSDNode pointer else nullptr. +static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); + return Const != nullptr && !Const->isOpaque() ? Const : nullptr; +} + +SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { + assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && + "Unexpected binary operator"); + + // Don't do this unless the old select is going away. We want to eliminate the + // binary operator, not replace a binop with a select. + // TODO: Handle ISD::SELECT_CC. + unsigned SelOpNo = 0; + SDValue Sel = BO->getOperand(0); + if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { + SelOpNo = 1; + Sel = BO->getOperand(1); + } + + if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) + return SDValue(); + + SDValue CT = Sel.getOperand(1); + if (!isConstantOrConstantVector(CT, true) && + !isConstantFPBuildVectorOrConstantFP(CT)) + return SDValue(); + + SDValue CF = Sel.getOperand(2); + if (!isConstantOrConstantVector(CF, true) && + !isConstantFPBuildVectorOrConstantFP(CF)) + return SDValue(); + + // Bail out if any constants are opaque because we can't constant fold those. + // The exception is "and" and "or" with either 0 or -1 in which case we can + // propagate non constant operands into select. I.e.: + // and (select Cond, 0, -1), X --> select Cond, 0, X + // or X, (select Cond, -1, 0) --> select Cond, -1, X + auto BinOpcode = BO->getOpcode(); + bool CanFoldNonConst = + (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && + (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) && + (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF)); + + SDValue CBO = BO->getOperand(SelOpNo ^ 1); + if (!CanFoldNonConst && + !isConstantOrConstantVector(CBO, true) && + !isConstantFPBuildVectorOrConstantFP(CBO)) + return SDValue(); + + EVT VT = Sel.getValueType(); + + // In case of shift value and shift amount may have different VT. For instance + // on x86 shift amount is i8 regardles of LHS type. Bail out if we have + // swapped operands and value types do not match. NB: x86 is fine if operands + // are not swapped with shift amount VT being not bigger than shifted value. + // TODO: that is possible to check for a shift operation, correct VTs and + // still perform optimization on x86 if needed. + if (SelOpNo && VT != CBO.getValueType()) + return SDValue(); + + // We have a select-of-constants followed by a binary operator with a + // constant. Eliminate the binop by pulling the constant math into the select. + // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO + SDLoc DL(Sel); + SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) + : DAG.getNode(BinOpcode, DL, VT, CT, CBO); + if (!CanFoldNonConst && !NewCT.isUndef() && + !isConstantOrConstantVector(NewCT, true) && + !isConstantFPBuildVectorOrConstantFP(NewCT)) + return SDValue(); + + SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) + : DAG.getNode(BinOpcode, DL, VT, CF, CBO); + if (!CanFoldNonConst && !NewCF.isUndef() && + !isConstantOrConstantVector(NewCF, true) && + !isConstantFPBuildVectorOrConstantFP(NewCF)) + return SDValue(); + + SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); + SelectOp->setFlags(BO->getFlags()); + return SelectOp; +} + +static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && + "Expecting add or sub"); + + // Match a constant operand and a zext operand for the math instruction: + // add Z, C + // sub C, Z + bool IsAdd = N->getOpcode() == ISD::ADD; + SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0); + SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1); + auto *CN = dyn_cast<ConstantSDNode>(C); + if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + + // Match the zext operand as a setcc of a boolean. + if (Z.getOperand(0).getOpcode() != ISD::SETCC || + Z.getOperand(0).getValueType() != MVT::i1) + return SDValue(); + + // Match the compare as: setcc (X & 1), 0, eq. + SDValue SetCC = Z.getOperand(0); + ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); + if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) || + SetCC.getOperand(0).getOpcode() != ISD::AND || + !isOneConstant(SetCC.getOperand(0).getOperand(1))) + return SDValue(); + + // We are adding/subtracting a constant and an inverted low bit. Turn that + // into a subtract/add of the low bit with incremented/decremented constant: + // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1)) + // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1)) + EVT VT = C.getValueType(); + SDLoc DL(N); + SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT); + SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) : + DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); + return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); +} + +/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into +/// a shift and add with a different constant. +static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && + "Expecting add or sub"); + + // We need a constant operand for the add/sub, and the other operand is a + // logical shift right: add (srl), C or sub C, (srl). + // TODO - support non-uniform vector amounts. + bool IsAdd = N->getOpcode() == ISD::ADD; + SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); + SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); + ConstantSDNode *C = isConstOrConstSplat(ConstantOp); + if (!C || ShiftOp.getOpcode() != ISD::SRL) + return SDValue(); + + // The shift must be of a 'not' value. + SDValue Not = ShiftOp.getOperand(0); + if (!Not.hasOneUse() || !isBitwiseNot(Not)) + return SDValue(); + + // The shift must be moving the sign bit to the least-significant-bit. + EVT VT = ShiftOp.getValueType(); + SDValue ShAmt = ShiftOp.getOperand(1); + ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); + if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1)) + return SDValue(); + + // Eliminate the 'not' by adjusting the shift and add/sub constant: + // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) + // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) + SDLoc DL(N); + auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; + SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); + APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1; + return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); +} + +/// Try to fold a node that behaves like an ADD (note that N isn't necessarily +/// an ISD::ADD here, it could for example be an ISD::OR if we know that there +/// are no common bits set in the operands). +SDValue DAGCombiner::visitADDLike(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (add x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + } + + // fold (add x, undef) -> undef + if (N0.isUndef()) + return N0; + + if (N1.isUndef()) + return N1; + + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { + // canonicalize constant to RHS + if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::ADD, DL, VT, N1, N0); + // fold (add c1, c2) -> c1+c2 + return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(), + N1.getNode()); + } + + // fold (add x, 0) -> x + if (isNullConstant(N1)) + return N0; + + if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { + // fold ((A-c1)+c2) -> (A+(c2-c1)) + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) { + SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(), + N0.getOperand(1).getNode()); + assert(Sub && "Constant folding failed"); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub); + } + + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { + SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(), + N0.getOperand(0).getNode()); + assert(Add && "Constant folding failed"); + return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); + } + + // add (sext i1 X), 1 -> zext (not i1 X) + // We don't transform this pattern: + // add (zext i1 X), -1 -> sext (not i1 X) + // because most (?) targets generate better code for the zext form. + if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && + isOneOrOneSplat(N1)) { + SDValue X = N0.getOperand(0); + if ((!LegalOperations || + (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && + X.getScalarValueSizeInBits() == 1) { + SDValue Not = DAG.getNOT(DL, X, X.getValueType()); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); + } + } + + // Undo the add -> or combine to merge constant offsets from a frame index. + if (N0.getOpcode() == ISD::OR && + isa<FrameIndexSDNode>(N0.getOperand(0)) && + isa<ConstantSDNode>(N0.getOperand(1)) && + DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { + SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); + } + } + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + // reassociate add + if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) { + if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) + return RADD; + } + // fold ((0-A) + B) -> B-A + if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) + return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); + + // fold (A + (0-B)) -> A-B + if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) + return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); + + // fold (A+(B-A)) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) + return N1.getOperand(0); + + // fold ((B-A)+A) -> B + if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) + return N0.getOperand(0); + + // fold ((A-B)+(C-A)) -> (C-B) + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && + N0.getOperand(0) == N1.getOperand(1)) + return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), + N0.getOperand(1)); + + // fold ((A-B)+(B-C)) -> (A-C) + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && + N0.getOperand(1) == N1.getOperand(0)) + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), + N1.getOperand(1)); + + // fold (A+(B-(A+C))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(0)) + return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), + N1.getOperand(1).getOperand(1)); + + // fold (A+(B-(C+A))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(1)) + return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), + N1.getOperand(1).getOperand(0)); + + // fold (A+((B-A)+or-C)) to (B+or-C) + if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && + N1.getOperand(0).getOpcode() == ISD::SUB && + N0 == N1.getOperand(0).getOperand(1)) + return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0), + N1.getOperand(1)); + + // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + + if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10)) + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), + DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); + } + + // fold (add (umax X, C), -C) --> (usubsat X, C) + if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) { + auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) { + return (!Max && !Op) || + (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue())); + }; + if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT, + /*AllowUndefs*/ true)) + return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), + N0.getOperand(1)); + } + + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + if (isOneOrOneSplat(N1)) { + // fold (add (xor a, -1), 1) -> (sub 0, a) + if (isBitwiseNot(N0)) + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + N0.getOperand(0)); + + // fold (add (add (xor a, -1), b), 1) -> (sub b, a) + if (N0.getOpcode() == ISD::ADD || + N0.getOpcode() == ISD::UADDO || + N0.getOpcode() == ISD::SADDO) { + SDValue A, Xor; + + if (isBitwiseNot(N0.getOperand(0))) { + A = N0.getOperand(1); + Xor = N0.getOperand(0); + } else if (isBitwiseNot(N0.getOperand(1))) { + A = N0.getOperand(0); + Xor = N0.getOperand(1); + } + + if (Xor) + return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0)); + } + + // Look for: + // add (add x, y), 1 + // And if the target does not like this form then turn into: + // sub y, (xor x, -1) + if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && + N0.getOpcode() == ISD::ADD) { + SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), + DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not); + } + } + + // (x - y) + -1 -> add (xor y, -1), x + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isAllOnesOrAllOnesSplat(N1)) { + SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); + } + + if (SDValue Combined = visitADDLikeCommutative(N0, N1, N)) + return Combined; + + if (SDValue Combined = visitADDLikeCommutative(N1, N0, N)) + return Combined; + + return SDValue(); +} + +SDValue DAGCombiner::visitADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + if (SDValue Combined = visitADDLike(N)) + return Combined; + + if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) + return V; + + if (SDValue V = foldAddSubOfSignBit(N, DAG)) + return V; + + // fold (a+b) -> (a|b) iff a and b share no bits. + if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && + DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, DL, VT, N0, N1); + + return SDValue(); +} + +SDValue DAGCombiner::visitADDSAT(SDNode *N) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + // fold vector ops + if (VT.isVector()) { + // TODO SimplifyVBinOp + + // fold (add_sat x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + } + + // fold (add_sat x, undef) -> -1 + if (N0.isUndef() || N1.isUndef()) + return DAG.getAllOnesConstant(DL, VT); + + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { + // canonicalize constant to RHS + if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(Opcode, DL, VT, N1, N0); + // fold (add_sat c1, c2) -> c3 + return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(), + N1.getNode()); + } + + // fold (add_sat x, 0) -> x + if (isNullConstant(N1)) + return N0; + + // If it cannot overflow, transform into an add. + if (Opcode == ISD::UADDSAT) + if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) + return DAG.getNode(ISD::ADD, DL, VT, N0, N1); + + return SDValue(); +} + +static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { + bool Masked = false; + + // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization. + while (true) { + if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) { + V = V.getOperand(0); + continue; + } + + if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) { + Masked = true; + V = V.getOperand(0); + continue; + } + + break; + } + + // If this is not a carry, return. + if (V.getResNo() != 1) + return SDValue(); + + if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY && + V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) + return SDValue(); + + EVT VT = V.getNode()->getValueType(0); + if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT)) + return SDValue(); + + // If the result is masked, then no matter what kind of bool it is we can + // return. If it isn't, then we need to make sure the bool type is either 0 or + // 1 and not other values. + if (Masked || + TLI.getBooleanContents(V.getValueType()) == + TargetLoweringBase::ZeroOrOneBooleanContent) + return V; + + return SDValue(); +} + +/// Given the operands of an add/sub operation, see if the 2nd operand is a +/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert +/// the opcode and bypass the mask operation. +static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, + SelectionDAG &DAG, const SDLoc &DL) { + if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1))) + return SDValue(); + + EVT VT = N0.getValueType(); + if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits()) + return SDValue(); + + // add N0, (and (AssertSext X, i1), 1) --> sub N0, X + // sub N0, (and (AssertSext X, i1), 1) --> add N0, X + return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0)); +} + +/// Helper for doing combines based on N0 and N1 being added to each other. +SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, + SDNode *LocReference) { + EVT VT = N0.getValueType(); + SDLoc DL(LocReference); + + // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) + if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && + isNullOrNullSplat(N1.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, DL, VT, N0, + DAG.getNode(ISD::SHL, DL, VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1))); + + if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL)) + return V; + + // Look for: + // add (add x, 1), y + // And if the target does not like this form then turn into: + // sub y, (xor x, -1) + if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && + N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) { + SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), + DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::SUB, DL, VT, N1, Not); + } + + // Hoist one-use subtraction by non-opaque constant: + // (x - C) + y -> (x + y) - C + // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); + } + // Hoist one-use subtraction from non-opaque constant: + // (C - x) + y -> (y - x) + C + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0)); + } + + // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1' + // rather than 'add 0/-1' (the zext should get folded). + // add (sext i1 Y), X --> sub X, (zext i1 Y) + if (N0.getOpcode() == ISD::SIGN_EXTEND && + N0.getOperand(0).getScalarValueSizeInBits() == 1 && + TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) { + SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); + } + + // add X, (sextinreg Y i1) -> sub X, (and Y 1) + if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { + VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); + if (TN->getVT() == MVT::i1) { + SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), + DAG.getConstant(1, DL, VT)); + return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); + } + } + + // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) + if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) && + N1.getResNo() == 0) + return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), + N0, N1.getOperand(0), N1.getOperand(2)); + + // (add X, Carry) -> (addcarry X, 0, Carry) + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) + if (SDValue Carry = getAsCarry(TLI, N1)) + return DAG.getNode(ISD::ADDCARRY, DL, + DAG.getVTList(VT, Carry.getValueType()), N0, + DAG.getConstant(0, DL, VT), Carry); + + return SDValue(); +} + +SDValue DAGCombiner::visitADDC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + // If the flag result is dead, turn this into an ADD. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + + // canonicalize constant to RHS. + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && !N1C) + return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0); + + // fold (addc x, 0) -> x + no carry out + if (isNullConstant(N1)) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, + DL, MVT::Glue)); + + // If it cannot overflow, transform into an add. + if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) + return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + + return SDValue(); +} + +static SDValue flipBoolean(SDValue V, const SDLoc &DL, + SelectionDAG &DAG, const TargetLowering &TLI) { + EVT VT = V.getValueType(); + + SDValue Cst; + switch (TLI.getBooleanContents(VT)) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + Cst = DAG.getConstant(1, DL, VT); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + Cst = DAG.getAllOnesConstant(DL, VT); + break; + } + + return DAG.getNode(ISD::XOR, DL, VT, V, Cst); +} + +/** + * Flips a boolean if it is cheaper to compute. If the Force parameters is set, + * then the flip also occurs if computing the inverse is the same cost. + * This function returns an empty SDValue in case it cannot flip the boolean + * without increasing the cost of the computation. If you want to flip a boolean + * no matter what, use flipBoolean. + */ +static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, + const TargetLowering &TLI, + bool Force) { + if (Force && isa<ConstantSDNode>(V)) + return flipBoolean(V, SDLoc(V), DAG, TLI); + + if (V.getOpcode() != ISD::XOR) + return SDValue(); + + ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false); + if (!Const) + return SDValue(); + + EVT VT = V.getValueType(); + + bool IsFlip = false; + switch(TLI.getBooleanContents(VT)) { + case TargetLowering::ZeroOrOneBooleanContent: + IsFlip = Const->isOne(); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + IsFlip = Const->isAllOnesValue(); + break; + case TargetLowering::UndefinedBooleanContent: + IsFlip = (Const->getAPIntValue() & 0x01) == 1; + break; + } + + if (IsFlip) + return V.getOperand(0); + if (Force) + return flipBoolean(V, SDLoc(V), DAG, TLI); + return SDValue(); +} + +SDValue DAGCombiner::visitADDO(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + bool IsSigned = (ISD::SADDO == N->getOpcode()); + + EVT CarryVT = N->getValueType(1); + SDLoc DL(N); + + // If the flag result is dead, turn this into an ADD. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), + DAG.getUNDEF(CarryVT)); + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0); + + // fold (addo x, 0) -> x + no carry out + if (isNullOrNullSplat(N1)) + return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); + + if (!IsSigned) { + // If it cannot overflow, transform into an add. + if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) + return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), + DAG.getConstant(0, DL, CarryVT)); + + // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. + if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) { + SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), + DAG.getConstant(0, DL, VT), N0.getOperand(0)); + return CombineTo(N, Sub, + flipBoolean(Sub.getValue(1), DL, DAG, TLI)); + } + + if (SDValue Combined = visitUADDOLike(N0, N1, N)) + return Combined; + + if (SDValue Combined = visitUADDOLike(N1, N0, N)) + return Combined; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { + EVT VT = N0.getValueType(); + if (VT.isVector()) + return SDValue(); + + // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) + // If Y + 1 cannot overflow. + if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) { + SDValue Y = N1.getOperand(0); + SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType()); + if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y, + N1.getOperand(2)); + } + + // (uaddo X, Carry) -> (addcarry X, 0, Carry) + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) + if (SDValue Carry = getAsCarry(TLI, N1)) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, + DAG.getConstant(0, SDLoc(N), VT), Carry); + + return SDValue(); +} + +SDValue DAGCombiner::visitADDE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && !N1C) + return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), + N1, N0, CarryIn); + + // fold (adde x, y, false) -> (addc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); + + return SDValue(); +} + +SDValue DAGCombiner::visitADDCARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + SDLoc DL(N); + + // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && !N1C) + return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn); + + // fold (addcarry x, y, false) -> (uaddo x, y) + if (isNullConstant(CarryIn)) { + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0))) + return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); + } + + // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. + if (isNullConstant(N0) && isNullConstant(N1)) { + EVT VT = N0.getValueType(); + EVT CarryVT = CarryIn.getValueType(); + SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); + AddToWorklist(CarryExt.getNode()); + return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, + DAG.getConstant(1, DL, VT)), + DAG.getConstant(0, DL, CarryVT)); + } + + if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) + return Combined; + + if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N)) + return Combined; + + return SDValue(); +} + +/** + * If we are facing some sort of diamond carry propapagtion pattern try to + * break it up to generate something like: + * (addcarry X, 0, (addcarry A, B, Z):Carry) + * + * The end result is usually an increase in operation required, but because the + * carry is now linearized, other tranforms can kick in and optimize the DAG. + * + * Patterns typically look something like + * (uaddo A, B) + * / \ + * Carry Sum + * | \ + * | (addcarry *, 0, Z) + * | / + * \ Carry + * | / + * (addcarry X, *, *) + * + * But numerous variation exist. Our goal is to identify A, B, X and Z and + * produce a combine with a single path for carry propagation. + */ +static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, + SDValue X, SDValue Carry0, SDValue Carry1, + SDNode *N) { + if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1) + return SDValue(); + if (Carry1.getOpcode() != ISD::UADDO) + return SDValue(); + + SDValue Z; + + /** + * First look for a suitable Z. It will present itself in the form of + * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true + */ + if (Carry0.getOpcode() == ISD::ADDCARRY && + isNullConstant(Carry0.getOperand(1))) { + Z = Carry0.getOperand(2); + } else if (Carry0.getOpcode() == ISD::UADDO && + isOneConstant(Carry0.getOperand(1))) { + EVT VT = Combiner.getSetCCResultType(Carry0.getValueType()); + Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT); + } else { + // We couldn't find a suitable Z. + return SDValue(); + } + + + auto cancelDiamond = [&](SDValue A,SDValue B) { + SDLoc DL(N); + SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z); + Combiner.AddToWorklist(NewY.getNode()); + return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X, + DAG.getConstant(0, DL, X.getValueType()), + NewY.getValue(1)); + }; + + /** + * (uaddo A, B) + * | + * Sum + * | + * (addcarry *, 0, Z) + */ + if (Carry0.getOperand(0) == Carry1.getValue(0)) { + return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1)); + } + + /** + * (addcarry A, 0, Z) + * | + * Sum + * | + * (uaddo *, B) + */ + if (Carry1.getOperand(0) == Carry0.getValue(0)) { + return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1)); + } + + if (Carry1.getOperand(1) == Carry0.getValue(0)) { + return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, + SDNode *N) { + // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry. + if (isBitwiseNot(N0)) + if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) { + SDLoc DL(N); + SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1, + N0.getOperand(0), NotC); + return CombineTo(N, Sub, + flipBoolean(Sub.getValue(1), DL, DAG, TLI)); + } + + // Iff the flag result is dead: + // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) + // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo + // or the dependency between the instructions. + if ((N0.getOpcode() == ISD::ADD || + (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 && + N0.getValue(1) != CarryIn)) && + isNullConstant(N1) && !N->hasAnyUseOfValue(1)) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), + N0.getOperand(0), N0.getOperand(1), CarryIn); + + /** + * When one of the addcarry argument is itself a carry, we may be facing + * a diamond carry propagation. In which case we try to transform the DAG + * to ensure linear carry propagation if that is possible. + */ + if (auto Y = getAsCarry(TLI, N1)) { + // Because both are carries, Y and Z can be swapped. + if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N)) + return R; + if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N)) + return R; + } + + return SDValue(); +} + +// Since it may not be valid to emit a fold to zero for vector initializers +// check if we can before folding. +static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, bool LegalOperations) { + if (!VT.isVector()) + return DAG.getConstant(0, DL, VT); + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return DAG.getConstant(0, DL, VT); + return SDValue(); +} + +SDValue DAGCombiner::visitSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (sub x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + } + + // fold (sub x, x) -> 0 + // FIXME: Refactor this and xor and other similar operations together. + if (N0 == N1) + return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + // fold (sub c1, c2) -> c1-c2 + return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), + N1.getNode()); + } + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + + // fold (sub x, c) -> (add x, -c) + if (N1C) { + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); + } + + if (isNullOrNullSplat(N0)) { + unsigned BitWidth = VT.getScalarSizeInBits(); + // Right-shifting everything out but the sign bit followed by negation is + // the same as flipping arithmetic/logical shift type without the negation: + // -(X >>u 31) -> (X >>s 31) + // -(X >>s 31) -> (X >>u 31) + if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) { + ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1)); + if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) { + auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA; + if (!LegalOperations || TLI.isOperationLegal(NewSh, VT)) + return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1)); + } + } + + // 0 - X --> 0 if the sub is NUW. + if (N->getFlags().hasNoUnsignedWrap()) + return N0; + + if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) { + // N1 is either 0 or the minimum signed value. If the sub is NSW, then + // N1 must be 0 because negating the minimum signed value is undefined. + if (N->getFlags().hasNoSignedWrap()) + return N0; + + // 0 - X --> X if X is 0 or the minimum signed value. + return N1; + } + } + + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + if (isAllOnesOrAllOnesSplat(N0)) + return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + + // fold (A - (0-B)) -> A+B + if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) + return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1)); + + // fold A-(A-B) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) + return N1.getOperand(1); + + // fold (A+B)-A -> B + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) + return N0.getOperand(1); + + // fold (A+B)-B -> A + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) + return N0.getOperand(0); + + // fold (A+C1)-C2 -> A+(C1-C2) + if (N0.getOpcode() == ISD::ADD && + isConstantOrConstantVector(N1, /* NoOpaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { + SDValue NewC = DAG.FoldConstantArithmetic( + ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode()); + assert(NewC && "Constant folding failed"); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC); + } + + // fold C2-(A+C1) -> (C2-C1)-A + if (N1.getOpcode() == ISD::ADD) { + SDValue N11 = N1.getOperand(1); + if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && + isConstantOrConstantVector(N11, /* NoOpaques */ true)) { + SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), + N11.getNode()); + assert(NewC && "Constant folding failed"); + return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); + } + } + + // fold (A-C1)-C2 -> A-(C1+C2) + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N1, /* NoOpaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { + SDValue NewC = DAG.FoldConstantArithmetic( + ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode()); + assert(NewC && "Constant folding failed"); + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC); + } + + // fold (c1-A)-c2 -> (c1-c2)-A + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N1, /* NoOpaques */ true) && + isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) { + SDValue NewC = DAG.FoldConstantArithmetic( + ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode()); + assert(NewC && "Constant folding failed"); + return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1)); + } + + // fold ((A+(B+or-C))-B) -> A+or-C + if (N0.getOpcode() == ISD::ADD && + (N0.getOperand(1).getOpcode() == ISD::SUB || + N0.getOperand(1).getOpcode() == ISD::ADD) && + N0.getOperand(1).getOperand(0) == N1) + return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0), + N0.getOperand(1).getOperand(1)); + + // fold ((A+(C+B))-B) -> A+C + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), + N0.getOperand(1).getOperand(0)); + + // fold ((A-(B-C))-C) -> A-B + if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), + N0.getOperand(1).getOperand(0)); + + // fold (A-(B-C)) -> A+(C-B) + if (N1.getOpcode() == ISD::SUB && N1.hasOneUse()) + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), + N1.getOperand(0))); + + // fold (X - (-Y * Z)) -> (X + (Y * Z)) + if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { + if (N1.getOperand(0).getOpcode() == ISD::SUB && + isNullOrNullSplat(N1.getOperand(0).getOperand(0))) { + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); + } + if (N1.getOperand(1).getOpcode() == ISD::SUB && + isNullOrNullSplat(N1.getOperand(1).getOperand(0))) { + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, + N1.getOperand(0), + N1.getOperand(1).getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); + } + } + + // If either operand of a sub is undef, the result is undef + if (N0.isUndef()) + return N0; + if (N1.isUndef()) + return N1; + + if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) + return V; + + if (SDValue V = foldAddSubOfSignBit(N, DAG)) + return V; + + if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N))) + return V; + + // (x - y) - 1 -> add (xor y, -1), x + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) { + SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), + DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); + } + + // Look for: + // sub y, (xor x, -1) + // And if the target does not like this form then turn into: + // add (add x, y), 1 + if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) { + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT)); + } + + // Hoist one-use addition by non-opaque constant: + // (x + C) - y -> (x - y) + C + if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD && + isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1)); + } + // y - (x + C) -> (y - x) - C + if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD && + isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1)); + } + // (x - C) - y -> (x - y) - C + // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1)); + } + // (C - x) - y -> C - (x + y) + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add); + } + + // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1' + // rather than 'sub 0/1' (the sext should get folded). + // sub X, (zext i1 Y) --> add X, (sext i1 Y) + if (N1.getOpcode() == ISD::ZERO_EXTEND && + N1.getOperand(0).getScalarValueSizeInBits() == 1 && + TLI.getBooleanContents(VT) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, VT, N0, SExt); + } + + // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { + SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1); + SDValue S0 = N1.getOperand(0); + if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) { + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); + } + } + } + + // If the relocation model supports it, consider symbol offsets. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { + // fold (sub Sym, c) -> Sym-c + if (N1C && GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, + GA->getOffset() - + (uint64_t)N1C->getSExtValue()); + // fold (sub Sym+c1, Sym+c2) -> c1-c2 + if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) + if (GA->getGlobal() == GB->getGlobal()) + return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), + DL, VT); + } + + // sub X, (sextinreg Y i1) -> add X, (and Y 1) + if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { + VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); + if (TN->getVT() == MVT::i1) { + SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), + DAG.getConstant(1, DL, VT)); + return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); + } + } + + // Prefer an add for more folding potential and possibly better codegen: + // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) + if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { + SDValue ShAmt = N1.getOperand(1); + ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); + if (ShAmtC && + ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) { + SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt); + return DAG.getNode(ISD::ADD, DL, VT, N0, SRA); + } + } + + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) { + // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) + if (SDValue Carry = getAsCarry(TLI, N0)) { + SDValue X = N1; + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X); + return DAG.getNode(ISD::ADDCARRY, DL, + DAG.getVTList(VT, Carry.getValueType()), NegX, Zero, + Carry); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBSAT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + // fold vector ops + if (VT.isVector()) { + // TODO SimplifyVBinOp + + // fold (sub_sat x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + } + + // fold (sub_sat x, undef) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, DL, VT); + + // fold (sub_sat x, x) -> 0 + if (N0 == N1) + return DAG.getConstant(0, DL, VT); + + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + // fold (sub_sat c1, c2) -> c3 + return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(), + N1.getNode()); + } + + // fold (sub_sat x, 0) -> x + if (isNullConstant(N1)) + return N0; + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + // If the flag result is dead, turn this into an SUB. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + + // fold (subc x, x) -> 0 + no borrow + if (N0 == N1) + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + + // fold (subc x, 0) -> x + no borrow + if (isNullConstant(N1)) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow + if (isAllOnesConstant(N0)) + return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBO(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + bool IsSigned = (ISD::SSUBO == N->getOpcode()); + + EVT CarryVT = N->getValueType(1); + SDLoc DL(N); + + // If the flag result is dead, turn this into an SUB. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), + DAG.getUNDEF(CarryVT)); + + // fold (subo x, x) -> 0 + no borrow + if (N0 == N1) + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getConstant(0, DL, CarryVT)); + + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + + // fold (subox, c) -> (addo x, -c) + if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) { + return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, + DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); + } + + // fold (subo x, 0) -> x + no borrow + if (isNullOrNullSplat(N1)) + return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); + + // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow + if (!IsSigned && isAllOnesOrAllOnesSplat(N0)) + return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), + DAG.getConstant(0, DL, CarryVT)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (sube x, y, false) -> (subc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (subcarry x, y, false) -> (usubo x, y) + if (isNullConstant(CarryIn)) { + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0))) + return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); + } + + return SDValue(); +} + +// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and +// UMULFIXSAT here. +SDValue DAGCombiner::visitMULFIX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Scale = N->getOperand(2); + EVT VT = N0.getValueType(); + + // fold (mulfix x, undef, scale) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + + // Canonicalize constant to RHS (vector doesn't have to splat) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale); + + // fold (mulfix x, 0, scale) -> 0 + if (isNullConstant(N1)) + return DAG.getConstant(0, SDLoc(N), VT); + + return SDValue(); +} + +SDValue DAGCombiner::visitMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold (mul x, undef) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + + bool N0IsConst = false; + bool N1IsConst = false; + bool N1IsOpaqueConst = false; + bool N0IsOpaqueConst = false; + APInt ConstValue0, ConstValue1; + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); + N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); + assert((!N0IsConst || + ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && + "Splat APInt should be element width"); + assert((!N1IsConst || + ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && + "Splat APInt should be element width"); + } else { + N0IsConst = isa<ConstantSDNode>(N0); + if (N0IsConst) { + ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); + N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); + } + N1IsConst = isa<ConstantSDNode>(N1); + if (N1IsConst) { + ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); + N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); + } + } + + // fold (mul c1, c2) -> c1*c2 + if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) + return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, + N0.getNode(), N1.getNode()); + + // canonicalize constant to RHS (vector doesn't have to splat) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); + // fold (mul x, 0) -> 0 + if (N1IsConst && ConstValue1.isNullValue()) + return N1; + // fold (mul x, 1) -> x + if (N1IsConst && ConstValue1.isOneValue()) + return N0; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + // fold (mul x, -1) -> 0-x + if (N1IsConst && ConstValue1.isAllOnesValue()) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), N0); + } + // fold (mul x, (1 << c)) -> x << c + if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N1) && + (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { + SDLoc DL(N); + SDValue LogBase2 = BuildLogBase2(N1, DL); + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); + return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); + } + // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c + if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { + unsigned Log2Val = (-ConstValue1).logBase2(); + SDLoc DL(N); + // FIXME: If the input is something that is easily negated (e.g. a + // single-use add), we should put the negate there. + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(Log2Val, DL, + getShiftAmountTy(N0.getValueType())))); + } + + // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub. + // mul x, (2^N + 1) --> add (shl x, N), x + // mul x, (2^N - 1) --> sub (shl x, N), x + // Examples: x * 33 --> (x << 5) + x + // x * 15 --> (x << 4) - x + // x * -33 --> -((x << 5) + x) + // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) + if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { + // TODO: We could handle more general decomposition of any constant by + // having the target set a limit on number of ops and making a + // callback to determine that sequence (similar to sqrt expansion). + unsigned MathOp = ISD::DELETED_NODE; + APInt MulC = ConstValue1.abs(); + if ((MulC - 1).isPowerOf2()) + MathOp = ISD::ADD; + else if ((MulC + 1).isPowerOf2()) + MathOp = ISD::SUB; + + if (MathOp != ISD::DELETED_NODE) { + unsigned ShAmt = + MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2(); + assert(ShAmt < VT.getScalarSizeInBits() && + "multiply-by-constant generated out of bounds shift"); + SDLoc DL(N); + SDValue Shl = + DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT)); + SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0); + if (ConstValue1.isNegative()) + R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R); + return R; + } + } + + // (mul (shl X, c1), c2) -> (mul X, c2 << c1) + if (N0.getOpcode() == ISD::SHL && + isConstantOrConstantVector(N1, /* NoOpaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { + SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); + if (isConstantOrConstantVector(C3)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); + } + + // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one + // use. + { + SDValue Sh(nullptr, 0), Y(nullptr, 0); + + // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). + if (N0.getOpcode() == ISD::SHL && + isConstantOrConstantVector(N0.getOperand(1)) && + N0.getNode()->hasOneUse()) { + Sh = N0; Y = N1; + } else if (N1.getOpcode() == ISD::SHL && + isConstantOrConstantVector(N1.getOperand(1)) && + N1.getNode()->hasOneUse()) { + Sh = N1; Y = N0; + } + + if (Sh.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); + } + } + + // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) + if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && + N0.getOpcode() == ISD::ADD && + DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && + isMulAddWithConstProfitable(N, N0, N1)) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, + DAG.getNode(ISD::MUL, SDLoc(N0), VT, + N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, SDLoc(N1), VT, + N0.getOperand(1), N1)); + + // reassociate mul + if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) + return RMUL; + + return SDValue(); +} + +/// Return true if divmod libcall is available. +static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, + const TargetLowering &TLI) { + RTLIB::Libcall LC; + EVT NodeType = Node->getValueType(0); + if (!NodeType.isSimple()) + return false; + switch (NodeType.getSimpleVT().SimpleTy) { + default: return false; // No libcall for vector types. + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + return TLI.getLibcallName(LC) != nullptr; +} + +/// Issue divrem if both quotient and remainder are needed. +SDValue DAGCombiner::useDivRem(SDNode *Node) { + if (Node->use_empty()) + return SDValue(); // This is a dead node, leave it alone. + + unsigned Opcode = Node->getOpcode(); + bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + + // DivMod lib calls can still work on non-legal types if using lib-calls. + EVT VT = Node->getValueType(0); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT)) + return SDValue(); + + // If DIVREM is going to get expanded into a libcall, + // but there is no libcall available, then don't combine. + if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && + !isDivRemLibcallAvailable(Node, isSigned, TLI)) + return SDValue(); + + // If div is legal, it's better to do the normal expansion + unsigned OtherOpcode = 0; + if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { + OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; + if (TLI.isOperationLegalOrCustom(Opcode, VT)) + return SDValue(); + } else { + OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) + return SDValue(); + } + + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue combined; + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node || User->getOpcode() == ISD::DELETED_NODE || + User->use_empty()) + continue; + // Convert the other matching node(s), too; + // otherwise, the DIVREM may get target-legalized into something + // target-specific that we won't be able to recognize. + unsigned UserOpc = User->getOpcode(); + if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && + User->getOperand(0) == Op0 && + User->getOperand(1) == Op1) { + if (!combined) { + if (UserOpc == OtherOpcode) { + SDVTList VTs = DAG.getVTList(VT, VT); + combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); + } else if (UserOpc == DivRemOpc) { + combined = SDValue(User, 0); + } else { + assert(UserOpc == Opcode); + continue; + } + } + if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) + CombineTo(User, combined); + else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) + CombineTo(User, combined.getValue(1)); + } + } + return combined; +} + +static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + unsigned Opc = N->getOpcode(); + bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + + // X / undef -> undef + // X % undef -> undef + // X / 0 -> undef + // X % 0 -> undef + // NOTE: This includes vectors where any divisor element is zero/undef. + if (DAG.isUndef(Opc, {N0, N1})) + return DAG.getUNDEF(VT); + + // undef / X -> 0 + // undef % X -> 0 + if (N0.isUndef()) + return DAG.getConstant(0, DL, VT); + + // 0 / X -> 0 + // 0 % X -> 0 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + if (N0C && N0C->isNullValue()) + return N0; + + // X / X -> 1 + // X % X -> 0 + if (N0 == N1) + return DAG.getConstant(IsDiv ? 1 : 0, DL, VT); + + // X / 1 -> X + // X % 1 -> 0 + // If this is a boolean op (single-bit element type), we can't have + // division-by-zero or remainder-by-zero, so assume the divisor is 1. + // TODO: Similarly, if we're zero-extending a boolean divisor, then assume + // it's a 1. + if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) + return IsDiv ? N0 : DAG.getConstant(0, DL, VT); + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + SDLoc DL(N); + + // fold (sdiv c1, c2) -> c1/c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); + // fold (sdiv X, -1) -> 0-X + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); + // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) + if (N1C && N1C->getAPIntValue().isMinSignedValue()) + return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), + DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT)); + + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + // If we know the sign bits of both operands are zero, strength reduce to a + // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); + + if (SDValue V = visitSDIVLike(N0, N1, N)) { + // If the corresponding remainder node exists, update its users with + // (Dividend - (Quotient * Divisor). + if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(), + { N0, N1 })) { + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); + AddToWorklist(Mul.getNode()); + AddToWorklist(Sub.getNode()); + CombineTo(RemNode, Sub); + } + return V; + } + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is + // true. Otherwise, we break the simplification logic in visitREM(). + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); + unsigned BitWidth = VT.getScalarSizeInBits(); + + // Helper for determining whether a value is a power-2 constant scalar or a + // vector of such elements. + auto IsPowerOfTwo = [](ConstantSDNode *C) { + if (C->isNullValue() || C->isOpaque()) + return false; + if (C->getAPIntValue().isPowerOf2()) + return true; + if ((-C->getAPIntValue()).isPowerOf2()) + return true; + return false; + }; + + // fold (sdiv X, pow2) -> simple ops after legalize + // FIXME: We check for the exact bit here because the generic lowering gives + // better results in that case. The target-specific lowering should learn how + // to handle exact sdivs efficiently. + if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) { + // Target-specific implementation of sdiv x, pow2. + if (SDValue Res = BuildSDIVPow2(N)) + return Res; + + // Create constants that are functions of the shift amount value. + EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); + SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy); + SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1); + C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy); + SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1); + if (!isConstantOrConstantVector(Inexact)) + return SDValue(); + + // Splat the sign bit into the register + SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0, + DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy)); + AddToWorklist(Sign.getNode()); + + // Add (N0 < 0) ? abs2 - 1 : 0; + SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact); + AddToWorklist(Srl.getNode()); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl); + AddToWorklist(Add.getNode()); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1); + AddToWorklist(Sra.getNode()); + + // Special case: (sdiv X, 1) -> X + // Special Case: (sdiv X, -1) -> 0-X + SDValue One = DAG.getConstant(1, DL, VT); + SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); + SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ); + SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ); + SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes); + Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra); + + // If dividing by a positive value, we're done. Otherwise, the result must + // be negated. + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra); + + // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding. + SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT); + SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra); + return Res; + } + + // If integer divide is expensive and we satisfy the requirements, emit an + // alternate sequence. Targets may check function attributes for size/speed + // trade-offs. + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isConstantOrConstantVector(N1) && + !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildSDIV(N)) + return Op; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + SDLoc DL(N); + + // fold (udiv c1, c2) -> c1/c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, + N0C, N1C)) + return Folded; + // fold (udiv X, -1) -> select(X == -1, 1, 0) + if (N1C && N1C->getAPIntValue().isAllOnesValue()) + return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), + DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT)); + + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + if (SDValue V = visitUDIVLike(N0, N1, N)) { + // If the corresponding remainder node exists, update its users with + // (Dividend - (Quotient * Divisor). + if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(), + { N0, N1 })) { + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); + AddToWorklist(Mul.getNode()); + AddToWorklist(Sub.getNode()); + CombineTo(RemNode, Sub); + } + return V; + } + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is + // true. Otherwise, we break the simplification logic in visitREM(). + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + // fold (udiv x, (1 << c)) -> x >>u c + if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N1)) { + SDValue LogBase2 = BuildLogBase2(N1, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); + AddToWorklist(Trunc.getNode()); + return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); + } + + // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 + if (N1.getOpcode() == ISD::SHL) { + SDValue N10 = N1.getOperand(0); + if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N10)) { + SDValue LogBase2 = BuildLogBase2(N10, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ADDVT = N1.getOperand(1).getValueType(); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); + AddToWorklist(Trunc.getNode()); + SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::SRL, DL, VT, N0, Add); + } + } + + // fold (udiv x, c) -> alternate + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isConstantOrConstantVector(N1) && + !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildUDIV(N)) + return Op; + + return SDValue(); +} + +// handles ISD::SREM and ISD::UREM +SDValue DAGCombiner::visitREM(SDNode *N) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); + + bool isSigned = (Opcode == ISD::SREM); + SDLoc DL(N); + + // fold (rem c1, c2) -> c1%c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) + return Folded; + // fold (urem X, -1) -> select(X == -1, 0, x) + if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) + return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), + DAG.getConstant(0, DL, VT), N0); + + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + if (isSigned) { + // If we know the sign bits of both operands are zero, strength reduce to a + // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, DL, VT, N0, N1); + } else { + SDValue NegOne = DAG.getAllOnesConstant(DL, VT); + if (DAG.isKnownToBeAPowerOfTwo(N1)) { + // fold (urem x, pow2) -> (and x, pow2-1) + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); + } + if (N1.getOpcode() == ISD::SHL && + DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); + } + } + + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the + // speculative DIV must not cause a DIVREM conversion. We guard against this + // by skipping the simplification if isIntDivCheap(). When div is not cheap, + // combine will not return a DIVREM. Regardless, checking cheapness here + // makes sense since the simplification results in fatter code. + if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { + SDValue OptimizedDiv = + isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); + if (OptimizedDiv.getNode()) { + // If the equivalent Div node also exists, update its users. + unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(), + { N0, N1 })) + CombineTo(DivNode, OptimizedDiv); + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); + AddToWorklist(OptimizedDiv.getNode()); + AddToWorklist(Mul.getNode()); + return Sub; + } + } + + // sdiv, srem -> sdivrem + if (SDValue DivRem = useDivRem(N)) + return DivRem.getValue(1); + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHS(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + if (VT.isVector()) { + // fold (mulhs x, 0) -> 0 + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); + } + + // fold (mulhs x, 0) -> 0 + if (isNullConstant(N1)) + return N1; + // fold (mulhs x, 1) -> (sra x, size(x)-1) + if (isOneConstant(N1)) + return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, + DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL, + getShiftAmountTy(N0.getValueType()))); + + // fold (mulhs x, undef) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, DL, VT); + + // If the type twice as wide is legal, transform the mulhs to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); + N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); + N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHU(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + if (VT.isVector()) { + // fold (mulhu x, 0) -> 0 + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); + } + + // fold (mulhu x, 0) -> 0 + if (isNullConstant(N1)) + return N1; + // fold (mulhu x, 1) -> 0 + if (isOneConstant(N1)) + return DAG.getConstant(0, DL, N0.getValueType()); + // fold (mulhu x, undef) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, DL, VT); + + // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c) + if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) { + unsigned NumEltBits = VT.getScalarSizeInBits(); + SDValue LogBase2 = BuildLogBase2(N1, DL); + SDValue SRLAmt = DAG.getNode( + ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2); + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT); + return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); + } + + // If the type twice as wide is legal, transform the mulhu to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); + N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); + N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + } + } + + return SDValue(); +} + +/// Perform optimizations common to nodes that compute two values. LoOp and HiOp +/// give the opcodes for the two computations that are being performed. Return +/// true if a simplification was made. +SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp) { + // If the high half is not needed, just compute the low half. + bool HiExists = N->hasAnyUseOfValue(1); + if (!HiExists && (!LegalOperations || + TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { + SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); + return CombineTo(N, Res, Res); + } + + // If the low half is not needed, just compute the high half. + bool LoExists = N->hasAnyUseOfValue(0); + if (!LoExists && (!LegalOperations || + TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) { + SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); + return CombineTo(N, Res, Res); + } + + // If both halves are used, return as it is. + if (LoExists && HiExists) + return SDValue(); + + // If the two computed results can be simplified separately, separate them. + if (LoExists) { + SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); + AddToWorklist(Lo.getNode()); + SDValue LoOpt = combine(Lo.getNode()); + if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType()))) + return CombineTo(N, LoOpt, LoOpt); + } + + if (HiExists) { + SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); + AddToWorklist(Hi.getNode()); + SDValue HiOpt = combine(Hi.getNode()); + if (HiOpt.getNode() && HiOpt != Hi && + (!LegalOperations || + TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType()))) + return CombineTo(N, HiOpt, HiOpt); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) + return Res; + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); + Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); + // Compute the high part as N1. + Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); + Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); + // Compute the low part as N0. + Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); + return CombineTo(N, Lo, Hi); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) + return Res; + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // (umul_lohi N0, 0) -> (0, 0) + if (isNullConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, Zero, Zero); + } + + // (umul_lohi N0, 1) -> (N0, 0) + if (isOneConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, N->getOperand(0), Zero); + } + + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); + Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); + // Compute the high part as N1. + Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); + Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); + // Compute the low part as N0. + Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); + return CombineTo(N, Lo, Hi); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitMULO(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + bool IsSigned = (ISD::SMULO == N->getOpcode()); + + EVT CarryVT = N->getValueType(1); + SDLoc DL(N); + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0); + + // fold (mulo x, 0) -> 0 + no carry out + if (isNullOrNullSplat(N1)) + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getConstant(0, DL, CarryVT)); + + // (mulo x, 2) -> (addo x, x) + if (ConstantSDNode *C2 = isConstOrConstSplat(N1)) + if (C2->getAPIntValue() == 2) + return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, + N->getVTList(), N0, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitIMINMAX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold operation with constant operands. + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + if (N0C && N1C) + return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); + + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + + // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. + // Only do this if the current op isn't legal and the flipped is. + unsigned Opcode = N->getOpcode(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isOperationLegal(Opcode, VT) && + (N0.isUndef() || DAG.SignBitIsZero(N0)) && + (N1.isUndef() || DAG.SignBitIsZero(N1))) { + unsigned AltOpcode; + switch (Opcode) { + case ISD::SMIN: AltOpcode = ISD::UMIN; break; + case ISD::SMAX: AltOpcode = ISD::UMAX; break; + case ISD::UMIN: AltOpcode = ISD::SMIN; break; + case ISD::UMAX: AltOpcode = ISD::SMAX; break; + default: llvm_unreachable("Unknown MINMAX opcode"); + } + if (TLI.isOperationLegal(AltOpcode, VT)) + return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); + } + + return SDValue(); +} + +/// If this is a bitwise logic instruction and both operands have the same +/// opcode, try to sink the other opcode after the logic instruction. +SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + unsigned LogicOpcode = N->getOpcode(); + unsigned HandOpcode = N0.getOpcode(); + assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || + LogicOpcode == ISD::XOR) && "Expected logic opcode"); + assert(HandOpcode == N1.getOpcode() && "Bad input!"); + + // Bail early if none of these transforms apply. + if (N0.getNumOperands() == 0) + return SDValue(); + + // FIXME: We should check number of uses of the operands to not increase + // the instruction count for all transforms. + + // Handle size-changing casts. + SDValue X = N0.getOperand(0); + SDValue Y = N1.getOperand(0); + EVT XVT = X.getValueType(); + SDLoc DL(N); + if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND || + HandOpcode == ISD::SIGN_EXTEND) { + // If both operands have other uses, this transform would create extra + // instructions without eliminating anything. + if (!N0.hasOneUse() && !N1.hasOneUse()) + return SDValue(); + // We need matching integer source types. + if (XVT != Y.getValueType()) + return SDValue(); + // Don't create an illegal op during or after legalization. Don't ever + // create an unsupported vector op. + if ((VT.isVector() || LegalOperations) && + !TLI.isOperationLegalOrCustom(LogicOpcode, XVT)) + return SDValue(); + // Avoid infinite looping with PromoteIntBinOp. + // TODO: Should we apply desirable/legal constraints to all opcodes? + if (HandOpcode == ISD::ANY_EXTEND && LegalTypes && + !TLI.isTypeDesirableForOp(LogicOpcode, XVT)) + return SDValue(); + // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y) + SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); + return DAG.getNode(HandOpcode, DL, VT, Logic); + } + + // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y) + if (HandOpcode == ISD::TRUNCATE) { + // If both operands have other uses, this transform would create extra + // instructions without eliminating anything. + if (!N0.hasOneUse() && !N1.hasOneUse()) + return SDValue(); + // We need matching source types. + if (XVT != Y.getValueType()) + return SDValue(); + // Don't create an illegal op during or after legalization. + if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT)) + return SDValue(); + // Be extra careful sinking truncate. If it's free, there's no benefit in + // widening a binop. Also, don't create a logic op on an illegal type. + if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT)) + return SDValue(); + if (!TLI.isTypeLegal(XVT)) + return SDValue(); + SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); + return DAG.getNode(HandOpcode, DL, VT, Logic); + } + + // For binops SHL/SRL/SRA/AND: + // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z + if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL || + HandOpcode == ISD::SRA || HandOpcode == ISD::AND) && + N0.getOperand(1) == N1.getOperand(1)) { + // If either operand has other uses, this transform is not an improvement. + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); + return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1)); + } + + // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y) + if (HandOpcode == ISD::BSWAP) { + // If either operand has other uses, this transform is not an improvement. + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); + return DAG.getNode(HandOpcode, DL, VT, Logic); + } + + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) + // Only perform this optimization up until type legalization, before + // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by + // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and + // we don't want to undo this promotion. + // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper + // on scalars. + if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) && + Level <= AfterLegalizeTypes) { + // Input types must be integer and the same. + if (XVT.isInteger() && XVT == Y.getValueType() && + !(VT.isVector() && TLI.isTypeLegal(VT) && + !XVT.isVector() && !TLI.isTypeLegal(XVT))) { + SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); + return DAG.getNode(HandOpcode, DL, VT, Logic); + } + } + + // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). + // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) + // If both shuffles use the same mask, and both shuffle within a single + // vector, then it is worthwhile to move the swizzle after the operation. + // The type-legalizer generates this pattern when loading illegal + // vector types from memory. In many cases this allows additional shuffle + // optimizations. + // There are other cases where moving the shuffle after the xor/and/or + // is profitable even if shuffles don't perform a swizzle. + // If both shuffles use the same mask, and both shuffles have the same first + // or second operand, then it might still be profitable to move the shuffle + // after the xor/and/or operation. + if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { + auto *SVN0 = cast<ShuffleVectorSDNode>(N0); + auto *SVN1 = cast<ShuffleVectorSDNode>(N1); + assert(X.getValueType() == Y.getValueType() && + "Inputs to shuffles are not the same type"); + + // Check that both shuffles use the same mask. The masks are known to be of + // the same length because the result vector type is the same. + // Check also that shuffles have only one use to avoid introducing extra + // instructions. + if (!SVN0->hasOneUse() || !SVN1->hasOneUse() || + !SVN0->getMask().equals(SVN1->getMask())) + return SDValue(); + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + SDValue ShOp = N0.getOperand(1); + if (LogicOpcode == ISD::XOR && !ShOp.isUndef()) + ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); + + // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C) + if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { + SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, + N0.getOperand(0), N1.getOperand(0)); + return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask()); + } + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + ShOp = N0.getOperand(0); + if (LogicOpcode == ISD::XOR && !ShOp.isUndef()) + ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); + + // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B)) + if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) { + SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1), + N1.getOperand(1)); + return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask()); + } + } + + return SDValue(); +} + +/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient. +SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, + const SDLoc &DL) { + SDValue LL, LR, RL, RR, N0CC, N1CC; + if (!isSetCCEquivalent(N0, LL, LR, N0CC) || + !isSetCCEquivalent(N1, RL, RR, N1CC)) + return SDValue(); + + assert(N0.getValueType() == N1.getValueType() && + "Unexpected operand types for bitwise logic op"); + assert(LL.getValueType() == LR.getValueType() && + RL.getValueType() == RR.getValueType() && + "Unexpected operand types for setcc"); + + // If we're here post-legalization or the logic op type is not i1, the logic + // op type must match a setcc result type. Also, all folds require new + // operations on the left and right operands, so those types must match. + EVT VT = N0.getValueType(); + EVT OpVT = LL.getValueType(); + if (LegalOperations || VT.getScalarType() != MVT::i1) + if (VT != getSetCCResultType(OpVT)) + return SDValue(); + if (OpVT != RL.getValueType()) + return SDValue(); + + ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get(); + ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get(); + bool IsInteger = OpVT.isInteger(); + if (LR == RR && CC0 == CC1 && IsInteger) { + bool IsZero = isNullOrNullSplat(LR); + bool IsNeg1 = isAllOnesOrAllOnesSplat(LR); + + // All bits clear? + bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero; + // All sign bits clear? + bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1; + // Any bits set? + bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero; + // Any sign bits set? + bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero; + + // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0) + // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1) + // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0) + // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0) + if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) { + SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL); + AddToWorklist(Or.getNode()); + return DAG.getSetCC(DL, VT, Or, LR, CC1); + } + + // All bits set? + bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1; + // All sign bits set? + bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero; + // Any bits clear? + bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1; + // Any sign bits clear? + bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1; + + // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1) + // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0) + // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1) + // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1) + if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL); + AddToWorklist(And.getNode()); + return DAG.getSetCC(DL, VT, And, LR, CC1); + } + } + + // TODO: What is the 'or' equivalent of this fold? + // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) + if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 && + IsInteger && CC0 == ISD::SETNE && + ((isNullConstant(LR) && isAllOnesConstant(RR)) || + (isAllOnesConstant(LR) && isNullConstant(RR)))) { + SDValue One = DAG.getConstant(1, DL, OpVT); + SDValue Two = DAG.getConstant(2, DL, OpVT); + SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One); + AddToWorklist(Add.getNode()); + return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE); + } + + // Try more general transforms if the predicates match and the only user of + // the compares is the 'and' or 'or'. + if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 && + N0.hasOneUse() && N1.hasOneUse()) { + // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 + // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0 + if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) { + SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR); + SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR); + SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR); + SDValue Zero = DAG.getConstant(0, DL, OpVT); + return DAG.getSetCC(DL, VT, Or, Zero, CC1); + } + + // Turn compare of constants whose difference is 1 bit into add+and+setcc. + // TODO - support non-uniform vector amounts. + if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) { + // Match a shared variable operand and 2 non-opaque constant operands. + ConstantSDNode *C0 = isConstOrConstSplat(LR); + ConstantSDNode *C1 = isConstOrConstSplat(RR); + if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) { + // Canonicalize larger constant as C0. + if (C1->getAPIntValue().ugt(C0->getAPIntValue())) + std::swap(C0, C1); + + // The difference of the constants must be a single bit. + const APInt &C0Val = C0->getAPIntValue(); + const APInt &C1Val = C1->getAPIntValue(); + if ((C0Val - C1Val).isPowerOf2()) { + // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) --> + // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq + SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT); + SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC); + SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT); + SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC); + SDValue Zero = DAG.getConstant(0, DL, OpVT); + return DAG.getSetCC(DL, VT, And, Zero, CC0); + } + } + } + } + + // Canonicalize equivalent operands to LL == RL. + if (LL == RR && LR == RL) { + CC1 = ISD::getSetCCSwappedOperands(CC1); + std::swap(RL, RR); + } + + // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) + // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) + if (LL == RL && LR == RR) { + ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger) + : ISD::getSetCCOrOperation(CC0, CC1, IsInteger); + if (NewCC != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, OpVT)))) + return DAG.getSetCC(DL, VT, LL, LR, NewCC); + } + + return SDValue(); +} + +/// This contains all DAGCombine rules which reduce two values combined by +/// an And operation to a single value. This makes them reusable in the context +/// of visitSELECT(). Rules involving constants are not included as +/// visitSELECT() already handles those cases. +SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { + EVT VT = N1.getValueType(); + SDLoc DL(N); + + // fold (and x, undef) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, DL, VT); + + if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL)) + return V; + + if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && + VT.getSizeInBits() <= 64) { + if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { + // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal + // immediate for an add, but it is legal if its top c2 bits are set, + // transform the ADD so the immediate doesn't need to be materialized + // in a register. + APInt ADDC = ADDI->getAPIntValue(); + APInt SRLC = SRLI->getAPIntValue(); + if (ADDC.getMinSignedBits() <= 64 && + SRLC.ult(VT.getSizeInBits()) && + !TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + SRLC.getZExtValue()); + if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { + ADDC |= Mask; + if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDLoc DL0(N0); + SDValue NewAdd = + DAG.getNode(ISD::ADD, DL0, VT, + N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); + CombineTo(N0.getNode(), NewAdd); + // Return N so it doesn't get rechecked! + return SDValue(N, 0); + } + } + } + } + } + } + + // Reduce bit extract of low half of an integer to the narrower type. + // (and (srl i64:x, K), KMask) -> + // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask) + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) { + if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + unsigned Size = VT.getSizeInBits(); + const APInt &AndMask = CAnd->getAPIntValue(); + unsigned ShiftBits = CShift->getZExtValue(); + + // Bail out, this node will probably disappear anyway. + if (ShiftBits == 0) + return SDValue(); + + unsigned MaskBits = AndMask.countTrailingOnes(); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); + + if (AndMask.isMask() && + // Required bits must not span the two halves of the integer and + // must fit in the half size type. + (ShiftBits + MaskBits <= Size / 2) && + TLI.isNarrowingProfitable(VT, HalfVT) && + TLI.isTypeDesirableForOp(ISD::AND, HalfVT) && + TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) && + TLI.isTruncateFree(VT, HalfVT) && + TLI.isZExtFree(HalfVT, VT)) { + // The isNarrowingProfitable is to avoid regressions on PPC and + // AArch64 which match a few 64-bit bit insert / bit extract patterns + // on downstream users of this. Those patterns could probably be + // extended to handle extensions mixed in. + + SDValue SL(N0); + assert(MaskBits <= Size); + + // Extracting the highest bit of the low half. + EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT, + N0.getOperand(0)); + + SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT); + SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT); + SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK); + SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask); + return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And); + } + } + } + } + + return SDValue(); +} + +bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, EVT &ExtVT) { + if (!AndC->getAPIntValue().isMask()) + return false; + + unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); + + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT LoadedVT = LoadN->getMemoryVT(); + + if (ExtVT == LoadedVT && + (!LegalOperations || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { + // ZEXTLOAD will match without needing to change the size of the value being + // loaded. + return true; + } + + // Do not change the width of a volatile or atomic loads. + if (!LoadN->isSimple()) + return false; + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) + return false; + + if (LegalOperations && + !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) + return false; + + if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) + return false; + + return true; +} + +bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, + ISD::LoadExtType ExtType, EVT &MemVT, + unsigned ShAmt) { + if (!LDST) + return false; + // Only allow byte offsets. + if (ShAmt % 8) + return false; + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!MemVT.isRound()) + return false; + + // Don't change the width of a volatile or atomic loads. + if (!LDST->isSimple()) + return false; + + // Verify that we are actually reducing a load width here. + if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits()) + return false; + + // Ensure that this isn't going to produce an unsupported memory access. + if (ShAmt && + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + LDST->getAddressSpace(), ShAmt / 8, + LDST->getMemOperand()->getFlags())) + return false; + + // It's not possible to generate a constant of extended or untyped type. + EVT PtrType = LDST->getBasePtr().getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + if (isa<LoadSDNode>(LDST)) { + LoadSDNode *Load = cast<LoadSDNode>(LDST); + // Don't transform one with multiple uses, this would require adding a new + // load. + if (!SDValue(Load, 0).hasOneUse()) + return false; + + if (LegalOperations && + !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT)) + return false; + + // For the transform to be legal, the load must produce only two values + // (the value loaded and the chain). Don't transform a pre-increment + // load, for example, which produces an extra value. Otherwise the + // transformation is not equivalent, and the downstream logic to replace + // uses gets things wrong. + if (Load->getNumValues() > 2) + return false; + + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (Load->getExtensionType() != ISD::NON_EXTLOAD && + Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) + return false; + + if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT)) + return false; + } else { + assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode"); + StoreSDNode *Store = cast<StoreSDNode>(LDST); + // Can't write outside the original store + if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) + return false; + + if (LegalOperations && + !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT)) + return false; + } + return true; +} + +bool DAGCombiner::SearchForAndLoads(SDNode *N, + SmallVectorImpl<LoadSDNode*> &Loads, + SmallPtrSetImpl<SDNode*> &NodesWithConsts, + ConstantSDNode *Mask, + SDNode *&NodeToMask) { + // Recursively search for the operands, looking for loads which can be + // narrowed. + for (SDValue Op : N->op_values()) { + if (Op.getValueType().isVector()) + return false; + + // Some constants may need fixing up later if they are too large. + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && + (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) + NodesWithConsts.insert(N); + continue; + } + + if (!Op.hasOneUse()) + return false; + + switch(Op.getOpcode()) { + case ISD::LOAD: { + auto *Load = cast<LoadSDNode>(Op); + EVT ExtVT; + if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && + isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) { + + // ZEXTLOAD is already small enough. + if (Load->getExtensionType() == ISD::ZEXTLOAD && + ExtVT.bitsGE(Load->getMemoryVT())) + continue; + + // Use LE to convert equal sized loads to zext. + if (ExtVT.bitsLE(Load->getMemoryVT())) + Loads.push_back(Load); + + continue; + } + return false; + } + case ISD::ZERO_EXTEND: + case ISD::AssertZext: { + unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT VT = Op.getOpcode() == ISD::AssertZext ? + cast<VTSDNode>(Op.getOperand(1))->getVT() : + Op.getOperand(0).getValueType(); + + // We can accept extending nodes if the mask is wider or an equal + // width to the original type. + if (ExtVT.bitsGE(VT)) + continue; + break; + } + case ISD::OR: + case ISD::XOR: + case ISD::AND: + if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask, + NodeToMask)) + return false; + continue; + } + + // Allow one node which will masked along with any loads found. + if (NodeToMask) + return false; + + // Also ensure that the node to be masked only produces one data result. + NodeToMask = Op.getNode(); + if (NodeToMask->getNumValues() > 1) { + bool HasValue = false; + for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) { + MVT VT = SDValue(NodeToMask, i).getSimpleValueType(); + if (VT != MVT::Glue && VT != MVT::Other) { + if (HasValue) { + NodeToMask = nullptr; + return false; + } + HasValue = true; + } + } + assert(HasValue && "Node to be masked has no data result?"); + } + } + return true; +} + +bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { + auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!Mask) + return false; + + if (!Mask->getAPIntValue().isMask()) + return false; + + // No need to do anything if the and directly uses a load. + if (isa<LoadSDNode>(N->getOperand(0))) + return false; + + SmallVector<LoadSDNode*, 8> Loads; + SmallPtrSet<SDNode*, 2> NodesWithConsts; + SDNode *FixupNode = nullptr; + if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) { + if (Loads.size() == 0) + return false; + + LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); + SDValue MaskOp = N->getOperand(1); + + // If it exists, fixup the single node we allow in the tree that needs + // masking. + if (FixupNode) { + LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); + SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), + FixupNode->getValueType(0), + SDValue(FixupNode, 0), MaskOp); + DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); + if (And.getOpcode() == ISD ::AND) + DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp); + } + + // Narrow any constants that need it. + for (auto *LogicN : NodesWithConsts) { + SDValue Op0 = LogicN->getOperand(0); + SDValue Op1 = LogicN->getOperand(1); + + if (isa<ConstantSDNode>(Op0)) + std::swap(Op0, Op1); + + SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), + Op1, MaskOp); + + DAG.UpdateNodeOperands(LogicN, Op0, And); + } + + // Create narrow loads. + for (auto *Load : Loads) { + LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); + SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), + SDValue(Load, 0), MaskOp); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); + if (And.getOpcode() == ISD ::AND) + And = SDValue( + DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0); + SDValue NewLoad = ReduceLoadWidth(And.getNode()); + assert(NewLoad && + "Shouldn't be masking the load if it can't be narrowed"); + CombineTo(Load, NewLoad, NewLoad.getValue(1)); + } + DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode()); + return true; + } + return false; +} + +// Unfold +// x & (-1 'logical shift' y) +// To +// (x 'opposite logical shift' y) 'logical shift' y +// if it is better for performance. +SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { + assert(N->getOpcode() == ISD::AND); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Do we actually prefer shifts over mask? + if (!TLI.shouldFoldMaskToVariableShiftPair(N0)) + return SDValue(); + + // Try to match (-1 '[outer] logical shift' y) + unsigned OuterShift; + unsigned InnerShift; // The opposite direction to the OuterShift. + SDValue Y; // Shift amount. + auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool { + if (!M.hasOneUse()) + return false; + OuterShift = M->getOpcode(); + if (OuterShift == ISD::SHL) + InnerShift = ISD::SRL; + else if (OuterShift == ISD::SRL) + InnerShift = ISD::SHL; + else + return false; + if (!isAllOnesConstant(M->getOperand(0))) + return false; + Y = M->getOperand(1); + return true; + }; + + SDValue X; + if (matchMask(N1)) + X = N0; + else if (matchMask(N0)) + X = N1; + else + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + + // tmp = x 'opposite logical shift' y + SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y); + // ret = tmp 'logical shift' y + SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y); + + return T1; +} + +/// Try to replace shift/logic that tests if a bit is clear with mask + setcc. +/// For a target with a bit test, this is expected to become test + set and save +/// at least 1 instruction. +static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { + assert(And->getOpcode() == ISD::AND && "Expected an 'and' op"); + + // This is probably not worthwhile without a supported type. + EVT VT = And->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + // Look through an optional extension and find a 'not'. + // TODO: Should we favor test+set even without the 'not' op? + SDValue Not = And->getOperand(0), And1 = And->getOperand(1); + if (Not.getOpcode() == ISD::ANY_EXTEND) + Not = Not.getOperand(0); + if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1)) + return SDValue(); + + // Look though an optional truncation. The source operand may not be the same + // type as the original 'and', but that is ok because we are masking off + // everything but the low bit. + SDValue Srl = Not.getOperand(0); + if (Srl.getOpcode() == ISD::TRUNCATE) + Srl = Srl.getOperand(0); + + // Match a shift-right by constant. + if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() || + !isa<ConstantSDNode>(Srl.getOperand(1))) + return SDValue(); + + // We might have looked through casts that make this transform invalid. + // TODO: If the source type is wider than the result type, do the mask and + // compare in the source type. + const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1); + unsigned VTBitWidth = VT.getSizeInBits(); + if (ShiftAmt.uge(VTBitWidth)) + return SDValue(); + + // Turn this into a bit-test pattern using mask op + setcc: + // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0 + SDLoc DL(And); + SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT); + EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue Mask = DAG.getConstant( + APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT); + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ); + return DAG.getZExtOrTrunc(Setcc, DL, VT); +} + +SDValue DAGCombiner::visitAND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N1.getValueType(); + + // x & x --> x + if (N0 == N1) + return N0; + + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (and x, 0) -> 0, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + // do not return N0, because undef node may exist in N0 + return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), + SDLoc(N), N0.getValueType()); + if (ISD::isBuildVectorAllZeros(N1.getNode())) + // do not return N1, because undef node may exist in N1 + return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), + SDLoc(N), N1.getValueType()); + + // fold (and x, -1) -> x, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N0; + } + + // fold (and c1, c2) -> c1&c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); + // fold (and x, -1) -> x + if (isAllOnesConstant(N1)) + return N0; + // if (and x, c) is known to be zero, return 0 + unsigned BitWidth = VT.getScalarSizeInBits(); + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(BitWidth))) + return DAG.getConstant(0, SDLoc(N), VT); + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + // reassociate and + if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) + return RAND; + + // Try to convert a constant mask AND into a shuffle clear mask. + if (VT.isVector()) + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; + + // fold (and (or x, C), D) -> D if (C & D) == D + auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { + return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); + }; + if (N0.getOpcode() == ISD::OR && + ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) + return N1; + // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N0Op0 = N0.getOperand(0); + APInt Mask = ~N1C->getAPIntValue(); + Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); + if (DAG.MaskedValueIsZero(N0Op0, Mask)) { + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), + N0.getValueType(), N0Op0); + + // Replace uses of the AND with uses of the Zero extend node. + CombineTo(N, Zext); + + // We actually want to replace all uses of the any_extend with the + // zero_extend, to avoid duplicating things. This will later cause this + // AND to be folded. + CombineTo(N0.getNode(), Zext); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must + // already be zero by virtue of the width of the base type of the load. + // + // the 'X' node here can either be nothing or an extract_vector_elt to catch + // more cases. + if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && + N0.getOperand(0).getOpcode() == ISD::LOAD && + N0.getOperand(0).getResNo() == 0) || + (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) { + LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? + N0 : N0.getOperand(0) ); + + // Get the constant (if applicable) the zero'th operand is being ANDed with. + // This can be a pure constant or a vector splat, in which case we treat the + // vector as a scalar and use the splat value. + APInt Constant = APInt::getNullValue(1); + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + Constant = C->getAPIntValue(); + } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, + SplatBitSize, HasAnyUndefs); + if (IsSplat) { + // Undef bits can contribute to a possible optimisation if set, so + // set them. + SplatValue |= SplatUndef; + + // The splat value may be something like "0x00FFFFFF", which means 0 for + // the first vector value and FF for the rest, repeating. We need a mask + // that will apply equally to all members of the vector, so AND all the + // lanes of the constant together. + unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits(); + + // If the splat value has been compressed to a bitlength lower + // than the size of the vector lane, we need to re-expand it to + // the lane size. + if (EltBitWidth > SplatBitSize) + for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth); + SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2) + SplatValue |= SplatValue.shl(SplatBitSize); + + // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a + // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. + if ((SplatBitSize % EltBitWidth) == 0) { + Constant = APInt::getAllOnesValue(EltBitWidth); + for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i) + Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth); + } + } + } + + // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is + // actually legal and isn't going to get expanded, else this is a false + // optimisation. + bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, + Load->getValueType(0), + Load->getMemoryVT()); + + // Resize the constant to the same size as the original memory access before + // extension. If it is still the AllOnesValue then this AND is completely + // unneeded. + Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits()); + + bool B; + switch (Load->getExtensionType()) { + default: B = false; break; + case ISD::EXTLOAD: B = CanZextLoadProfitably; break; + case ISD::ZEXTLOAD: + case ISD::NON_EXTLOAD: B = true; break; + } + + if (B && Constant.isAllOnesValue()) { + // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to + // preserve semantics once we get rid of the AND. + SDValue NewLoad(Load, 0); + + // Fold the AND away. NewLoad may get replaced immediately. + CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); + + if (Load->getExtensionType() == ISD::EXTLOAD) { + NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, + Load->getValueType(0), SDLoc(Load), + Load->getChain(), Load->getBasePtr(), + Load->getOffset(), Load->getMemoryVT(), + Load->getMemOperand()); + // Replace uses of the EXTLOAD with the new ZEXTLOAD. + if (Load->getNumValues() == 3) { + // PRE/POST_INC loads have 3 values. + SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), + NewLoad.getValue(2) }; + CombineTo(Load, To, 3, true); + } else { + CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + } + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (and (load x), 255) -> (zextload x, i8) + // fold (and (extload x, i16), 255) -> (zextload x, i8) + // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) + if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || + (N0.getOpcode() == ISD::ANY_EXTEND && + N0.getOperand(0).getOpcode() == ISD::LOAD))) { + if (SDValue Res = ReduceLoadWidth(N)) { + LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND + ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); + AddToWorklist(N); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res); + return SDValue(N, 0); + } + } + + if (Level >= AfterLegalizeTypes) { + // Attempt to propagate the AND back up to the leaves which, if they're + // loads, can be combined to narrow loads and the AND node can be removed. + // Perform after legalization so that extend nodes will already be + // combined into the loads. + if (BackwardsPropagateMask(N, DAG)) { + return SDValue(N, 0); + } + } + + if (SDValue Combined = visitANDLike(N0, N1, N)) + return Combined; + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) + return V; + + // Masking the negated extension of a boolean is just the zero-extended + // boolean: + // and (sub 0, zext(bool X)), 1 --> zext(bool X) + // and (sub 0, sext(bool X)), 1 --> zext(bool X) + // + // Note: the SimplifyDemandedBits fold below can make an information-losing + // transform, and then we have no way to find this better fold. + if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) { + if (isNullOrNullSplat(N0.getOperand(0))) { + SDValue SubRHS = N0.getOperand(1); + if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && + SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) + return SubRHS; + if (SubRHS.getOpcode() == ISD::SIGN_EXTEND && + SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0)); + } + } + + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (zext_inreg (extload x)) -> (zextload x) + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isUNINDEXEDLoad(N0.getNode()) && + (ISD::isEXTLoad(N0.getNode()) || + (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned ExtBitSize = N1.getScalarValueSizeInBits(); + unsigned MemBitSize = MemVT.getScalarSizeInBits(); + APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize); + if (DAG.MaskedValueIsZero(N1, ExtBits) && + ((!LegalOperations && LN0->isSimple()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), + LN0->getBasePtr(), MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) + if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { + if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false)) + return BSwap; + } + + if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) + return Shifts; + + if (TLI.hasBitTest(N0, N1)) + if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) + return V; + + return SDValue(); +} + +/// Match (a >> 8) | (a << 8) as (bswap a) >> 16. +SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) + return SDValue(); + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) + return SDValue(); + + // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff) + bool LookPassAnd0 = false; + bool LookPassAnd1 = false; + if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) + std::swap(N0, N1); + if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() == ISD::AND) { + if (!N0.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + // Also handle 0xffff since the LHS is guaranteed to have zeros there. + // This is needed for X86. + if (!N01C || (N01C->getZExtValue() != 0xFF00 && + N01C->getZExtValue() != 0xFFFF)) + return SDValue(); + N0 = N0.getOperand(0); + LookPassAnd0 = true; + } + + if (N1.getOpcode() == ISD::AND) { + if (!N1.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N11C || N11C->getZExtValue() != 0xFF) + return SDValue(); + N1 = N1.getOperand(0); + LookPassAnd1 = true; + } + + if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) + return SDValue(); + if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse()) + return SDValue(); + + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N01C || !N11C) + return SDValue(); + if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) + return SDValue(); + + // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) + SDValue N00 = N0->getOperand(0); + if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { + if (!N00.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); + if (!N001C || N001C->getZExtValue() != 0xFF) + return SDValue(); + N00 = N00.getOperand(0); + LookPassAnd0 = true; + } + + SDValue N10 = N1->getOperand(0); + if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { + if (!N10.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); + // Also allow 0xFFFF since the bits will be shifted out. This is needed + // for X86. + if (!N101C || (N101C->getZExtValue() != 0xFF00 && + N101C->getZExtValue() != 0xFFFF)) + return SDValue(); + N10 = N10.getOperand(0); + LookPassAnd1 = true; + } + + if (N00 != N10) + return SDValue(); + + // Make sure everything beyond the low halfword gets set to zero since the SRL + // 16 will clear the top bits. + unsigned OpSizeInBits = VT.getSizeInBits(); + if (DemandHighBits && OpSizeInBits > 16) { + // If the left-shift isn't masked out then the only way this is a bswap is + // if all bits beyond the low 8 are 0. In that case the entire pattern + // reduces to a left shift anyway: leave it for other parts of the combiner. + if (!LookPassAnd0) + return SDValue(); + + // However, if the right shift isn't masked out then it might be because + // it's not needed. See if we can spot that too. + if (!LookPassAnd1 && + !DAG.MaskedValueIsZero( + N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) + return SDValue(); + } + + SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); + if (OpSizeInBits > 16) { + SDLoc DL(N); + Res = DAG.getNode(ISD::SRL, DL, VT, Res, + DAG.getConstant(OpSizeInBits - 16, DL, + getShiftAmountTy(VT))); + } + return Res; +} + +/// Return true if the specified node is an element that makes up a 32-bit +/// packed halfword byteswap. +/// ((x & 0x000000ff) << 8) | +/// ((x & 0x0000ff00) >> 8) | +/// ((x & 0x00ff0000) << 8) | +/// ((x & 0xff000000) >> 8) +static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { + if (!N.getNode()->hasOneUse()) + return false; + + unsigned Opc = N.getOpcode(); + if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) + return false; + + SDValue N0 = N.getOperand(0); + unsigned Opc0 = N0.getOpcode(); + if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL) + return false; + + ConstantSDNode *N1C = nullptr; + // SHL or SRL: look upstream for AND mask operand + if (Opc == ISD::AND) + N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + else if (Opc0 == ISD::AND) + N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N1C) + return false; + + unsigned MaskByteOffset; + switch (N1C->getZExtValue()) { + default: + return false; + case 0xFF: MaskByteOffset = 0; break; + case 0xFF00: MaskByteOffset = 1; break; + case 0xFFFF: + // In case demanded bits didn't clear the bits that will be shifted out. + // This is needed for X86. + if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) { + MaskByteOffset = 1; + break; + } + return false; + case 0xFF0000: MaskByteOffset = 2; break; + case 0xFF000000: MaskByteOffset = 3; break; + } + + // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). + if (Opc == ISD::AND) { + if (MaskByteOffset == 0 || MaskByteOffset == 2) { + // (x >> 8) & 0xff + // (x >> 8) & 0xff0000 + if (Opc0 != ISD::SRL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { + // (x << 8) & 0xff00 + // (x << 8) & 0xff000000 + if (Opc0 != ISD::SHL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + } else if (Opc == ISD::SHL) { + // (x & 0xff) << 8 + // (x & 0xff0000) << 8 + if (MaskByteOffset != 0 && MaskByteOffset != 2) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { // Opc == ISD::SRL + // (x & 0xff00) >> 8 + // (x & 0xff000000) >> 8 + if (MaskByteOffset != 1 && MaskByteOffset != 3) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + + if (Parts[MaskByteOffset]) + return false; + + Parts[MaskByteOffset] = N0.getOperand(0).getNode(); + return true; +} + +// Match 2 elements of a packed halfword bswap. +static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) { + if (N.getOpcode() == ISD::OR) + return isBSwapHWordElement(N.getOperand(0), Parts) && + isBSwapHWordElement(N.getOperand(1), Parts); + + if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) { + ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1)); + if (!C || C->getAPIntValue() != 16) + return false; + Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode(); + return true; + } + + return false; +} + +/// Match a 32-bit packed halfword bswap. That is +/// ((x & 0x000000ff) << 8) | +/// ((x & 0x0000ff00) >> 8) | +/// ((x & 0x00ff0000) << 8) | +/// ((x & 0xff000000) >> 8) +/// => (rotl (bswap x), 16) +SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) + return SDValue(); + + // Look for either + // (or (bswaphpair), (bswaphpair)) + // (or (or (bswaphpair), (and)), (and)) + // (or (or (and), (bswaphpair)), (and)) + SDNode *Parts[4] = {}; + + if (isBSwapHWordPair(N0, Parts)) { + // (or (or (and), (and)), (or (and), (and))) + if (!isBSwapHWordPair(N1, Parts)) + return SDValue(); + } else if (N0.getOpcode() == ISD::OR) { + // (or (or (or (and), (and)), (and)), (and)) + if (!isBSwapHWordElement(N1, Parts)) + return SDValue(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && + !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) + return SDValue(); + } else + return SDValue(); + + // Make sure the parts are all coming from the same node. + if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) + return SDValue(); + + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, + SDValue(Parts[0], 0)); + + // Result of the bswap should be rotated by 16. If it's not legal, then + // do (x << 16) | (x >> 16). + SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) + return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); + if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); +} + +/// This contains all DAGCombine rules which reduce two values combined by +/// an Or operation to a single value \see visitANDLike(). +SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { + EVT VT = N1.getValueType(); + SDLoc DL(N); + + // fold (or x, undef) -> -1 + if (!LegalOperations && (N0.isUndef() || N1.isUndef())) + return DAG.getAllOnesConstant(DL, VT); + + if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL)) + return V; + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + if (const ConstantSDNode *N0O1C = + getAsNonOpaqueConstant(N0.getOperand(1))) { + if (const ConstantSDNode *N1O1C = + getAsNonOpaqueConstant(N1.getOperand(1))) { + // We can only do this xform if we know that bits from X that are set in + // C2 but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = N0O1C->getAPIntValue(); + const APInt &RHSMask = N1O1C->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(0), N1.getOperand(0)); + return DAG.getNode(ISD::AND, DL, VT, X, + DAG.getConstant(LHSMask | RHSMask, DL, VT)); + } + } + } + } + + // (or (and X, M), (and X, N)) -> (and X, (or M, N)) + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(0) == N1.getOperand(0) && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(1), N1.getOperand(1)); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X); + } + + return SDValue(); +} + +/// OR combines for which the commuted variant will be tried as well. +static SDValue visitORCommutative( + SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { + EVT VT = N0.getValueType(); + if (N0.getOpcode() == ISD::AND) { + // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) + if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1); + + // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) + if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N1.getValueType(); + + // x | x --> x + if (N0 == N1) + return N0; + + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (or x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + + // fold (or x, -1) -> -1, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + // do not return N0, because undef node may exist in N0 + return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType()); + if (ISD::isBuildVectorAllOnes(N1.getNode())) + // do not return N1, because undef node may exist in N1 + return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); + + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) + // Do this only if the resulting shuffle is legal. + if (isa<ShuffleVectorSDNode>(N0) && + isa<ShuffleVectorSDNode>(N1) && + // Avoid folding a node with illegal type. + TLI.isTypeLegal(VT)) { + bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); + bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); + bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); + bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); + // Ensure both shuffles have a zero input. + if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) { + assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); + assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); + const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); + const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); + bool CanFold = true; + int NumElts = VT.getVectorNumElements(); + SmallVector<int, 4> Mask(NumElts); + + for (int i = 0; i != NumElts; ++i) { + int M0 = SV0->getMaskElt(i); + int M1 = SV1->getMaskElt(i); + + // Determine if either index is pointing to a zero vector. + bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts)); + bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts)); + + // If one element is zero and the otherside is undef, keep undef. + // This also handles the case that both are undef. + if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) { + Mask[i] = -1; + continue; + } + + // Make sure only one of the elements is zero. + if (M0Zero == M1Zero) { + CanFold = false; + break; + } + + assert((M0 >= 0 || M1 >= 0) && "Undef index!"); + + // We have a zero and non-zero element. If the non-zero came from + // SV0 make the index a LHS index. If it came from SV1, make it + // a RHS index. We need to mod by NumElts because we don't care + // which operand it came from in the original shuffles. + Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts; + } + + if (CanFold) { + SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); + SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); + + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, + Mask, DAG); + if (LegalShuffle) + return LegalShuffle; + } + } + } + } + + // fold (or c1, c2) -> c1|c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); + // fold (or x, 0) -> x + if (isNullConstant(N1)) + return N0; + // fold (or x, -1) -> -1 + if (isAllOnesConstant(N1)) + return N1; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + // fold (or x, c) -> c iff (x & ~c) == 0 + if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) + return N1; + + if (SDValue Combined = visitORLike(N0, N1, N)) + return Combined; + + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) + if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) + return BSwap; + if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) + return BSwap; + + // reassociate or + if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) + return ROR; + + // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) + // iff (c1 & c2) != 0 or c1/c2 are undef. + auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) { + return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue()); + }; + if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) { + if (SDValue COR = DAG.FoldConstantArithmetic( + ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) { + SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); + AddToWorklist(IOR.getNode()); + return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR); + } + } + + if (SDValue Combined = visitORCommutative(DAG, N0, N1, N)) + return Combined; + if (SDValue Combined = visitORCommutative(DAG, N1, N0, N)) + return Combined; + + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) + return V; + + // See if this is some rotate idiom. + if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N))) + return Rot; + + if (SDValue Load = MatchLoadCombine(N)) + return Load; + + // Simplify the operands using demanded-bits information. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // If OR can be rewritten into ADD, try combines based on ADD. + if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && + DAG.haveNoCommonBitsSet(N0, N1)) + if (SDValue Combined = visitADDLike(N)) + return Combined; + + return SDValue(); +} + +static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) { + if (Op.getOpcode() == ISD::AND && + DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { + Mask = Op.getOperand(1); + return Op.getOperand(0); + } + return Op; +} + +/// Match "(X shl/srl V1) & V2" where V2 may not be present. +static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, + SDValue &Mask) { + Op = stripConstantMask(DAG, Op, Mask); + if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { + Shift = Op; + return true; + } + return false; +} + +/// Helper function for visitOR to extract the needed side of a rotate idiom +/// from a shl/srl/mul/udiv. This is meant to handle cases where +/// InstCombine merged some outside op with one of the shifts from +/// the rotate pattern. +/// \returns An empty \c SDValue if the needed shift couldn't be extracted. +/// Otherwise, returns an expansion of \p ExtractFrom based on the following +/// patterns: +/// +/// (or (add v v) (shrl v bitwidth-1)): +/// expands (add v v) -> (shl v 1) +/// +/// (or (mul v c0) (shrl (mul v c1) c2)): +/// expands (mul v c0) -> (shl (mul v c1) c3) +/// +/// (or (udiv v c0) (shl (udiv v c1) c2)): +/// expands (udiv v c0) -> (shrl (udiv v c1) c3) +/// +/// (or (shl v c0) (shrl (shl v c1) c2)): +/// expands (shl v c0) -> (shl (shl v c1) c3) +/// +/// (or (shrl v c0) (shl (shrl v c1) c2)): +/// expands (shrl v c0) -> (shrl (shrl v c1) c3) +/// +/// Such that in all cases, c3+c2==bitwidth(op v c1). +static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, + SDValue ExtractFrom, SDValue &Mask, + const SDLoc &DL) { + assert(OppShift && ExtractFrom && "Empty SDValue"); + assert( + (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && + "Existing shift must be valid as a rotate half"); + + ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); + + // Value and Type of the shift. + SDValue OppShiftLHS = OppShift.getOperand(0); + EVT ShiftedVT = OppShiftLHS.getValueType(); + + // Amount of the existing shift. + ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); + + // (add v v) -> (shl v 1) + if (OppShift.getOpcode() == ISD::SRL && OppShiftCst && + ExtractFrom.getOpcode() == ISD::ADD && + ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) && + ExtractFrom.getOperand(0) == OppShiftLHS && + OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1) + return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS, + DAG.getShiftAmountConstant(1, ShiftedVT, DL)); + + // Preconditions: + // (or (op0 v c0) (shiftl/r (op0 v c1) c2)) + // + // Find opcode of the needed shift to be extracted from (op0 v c0). + unsigned Opcode = ISD::DELETED_NODE; + bool IsMulOrDiv = false; + // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift + // opcode or its arithmetic (mul or udiv) variant. + auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) { + IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant; + if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift) + return false; + Opcode = NeededShift; + return true; + }; + // op0 must be either the needed shift opcode or the mul/udiv equivalent + // that the needed shift can be extracted from. + if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) && + (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV))) + return SDValue(); + + // op0 must be the same opcode on both sides, have the same LHS argument, + // and produce the same value type. + if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || + OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || + ShiftedVT != ExtractFrom.getValueType()) + return SDValue(); + + // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. + ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); + // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. + ConstantSDNode *ExtractFromCst = + isConstOrConstSplat(ExtractFrom.getOperand(1)); + // TODO: We should be able to handle non-uniform constant vectors for these values + // Check that we have constant values. + if (!OppShiftCst || !OppShiftCst->getAPIntValue() || + !OppLHSCst || !OppLHSCst->getAPIntValue() || + !ExtractFromCst || !ExtractFromCst->getAPIntValue()) + return SDValue(); + + // Compute the shift amount we need to extract to complete the rotate. + const unsigned VTWidth = ShiftedVT.getScalarSizeInBits(); + if (OppShiftCst->getAPIntValue().ugt(VTWidth)) + return SDValue(); + APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue(); + // Normalize the bitwidth of the two mul/udiv/shift constant operands. + APInt ExtractFromAmt = ExtractFromCst->getAPIntValue(); + APInt OppLHSAmt = OppLHSCst->getAPIntValue(); + zeroExtendToMatch(ExtractFromAmt, OppLHSAmt); + + // Now try extract the needed shift from the ExtractFrom op and see if the + // result matches up with the existing shift's LHS op. + if (IsMulOrDiv) { + // Op to extract from is a mul or udiv by a constant. + // Check: + // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0 + // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0 + const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(), + NeededShiftAmt.getZExtValue()); + APInt ResultAmt; + APInt Rem; + APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem); + if (Rem != 0 || ResultAmt != OppLHSAmt) + return SDValue(); + } else { + // Op to extract from is a shift by a constant. + // Check: + // c2 - (bitwidth(op0 v c0) - c1) == c0 + if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc( + ExtractFromAmt.getBitWidth())) + return SDValue(); + } + + // Return the expanded shift op that should allow a rotate to be formed. + EVT ShiftVT = OppShift.getOperand(1).getValueType(); + EVT ResVT = ExtractFrom.getValueType(); + SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT); + return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode); +} + +// Return true if we can prove that, whenever Neg and Pos are both in the +// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that +// for two opposing shifts shift1 and shift2 and a value X with OpBits bits: +// +// (or (shift1 X, Neg), (shift2 X, Pos)) +// +// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate +// in direction shift1 by Neg. The range [0, EltSize) means that we only need +// to consider shift amounts with defined behavior. +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, + SelectionDAG &DAG) { + // If EltSize is a power of 2 then: + // + // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) + // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). + // + // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check + // for the stronger condition: + // + // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] + // + // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) + // we can just replace Neg with Neg' for the rest of the function. + // + // In other cases we check for the even stronger condition: + // + // Neg == EltSize - Pos [B] + // + // for all Neg and Pos. Note that the (or ...) then invokes undefined + // behavior if Pos == 0 (and consequently Neg == EltSize). + // + // We could actually use [A] whenever EltSize is a power of 2, but the + // only extra cases that it would match are those uninteresting ones + // where Neg and Pos are never in range at the same time. E.g. for + // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) + // as well as (sub 32, Pos), but: + // + // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) + // + // always invokes undefined behavior for 32-bit X. + // + // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. + unsigned MaskLoBits = 0; + if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { + if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { + KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0)); + unsigned Bits = Log2_64(EltSize); + if (NegC->getAPIntValue().getActiveBits() <= Bits && + ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) { + Neg = Neg.getOperand(0); + MaskLoBits = Bits; + } + } + } + + // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. + if (Neg.getOpcode() != ISD::SUB) + return false; + ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); + if (!NegC) + return false; + SDValue NegOp1 = Neg.getOperand(1); + + // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with + // Pos'. The truncation is redundant for the purpose of the equality. + if (MaskLoBits && Pos.getOpcode() == ISD::AND) { + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) { + KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0)); + if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits && + ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >= + MaskLoBits)) + Pos = Pos.getOperand(0); + } + } + + // The condition we need is now: + // + // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask + // + // If NegOp1 == Pos then we need: + // + // EltSize & Mask == NegC & Mask + // + // (because "x & Mask" is a truncation and distributes through subtraction). + APInt Width; + if (Pos == NegOp1) + Width = NegC->getAPIntValue(); + + // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. + // Then the condition we want to prove becomes: + // + // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask + // + // which, again because "x & Mask" is a truncation, becomes: + // + // NegC & Mask == (EltSize - PosC) & Mask + // EltSize & Mask == (NegC + PosC) & Mask + else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) + Width = PosC->getAPIntValue() + NegC->getAPIntValue(); + else + return false; + } else + return false; + + // Now we just need to check that EltSize & Mask == Width & Mask. + if (MaskLoBits) + // EltSize & Mask is 0 since Mask is EltSize - 1. + return Width.getLoBits(MaskLoBits) == 0; + return Width == EltSize; +} + +// A subroutine of MatchRotate used once we have found an OR of two opposite +// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces +// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the +// former being preferred if supported. InnerPos and InnerNeg are Pos and +// Neg with outer conversions stripped away. +SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, + SDValue Neg, SDValue InnerPos, + SDValue InnerNeg, unsigned PosOpcode, + unsigned NegOpcode, const SDLoc &DL) { + // fold (or (shl x, (*ext y)), + // (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) or (rotr x, (sub 32, y)) + // + // fold (or (shl x, (*ext (sub 32, y))), + // (srl x, (*ext y))) -> + // (rotr x, y) or (rotl x, (sub 32, y)) + EVT VT = Shifted.getValueType(); + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { + bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); + return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, + HasPos ? Pos : Neg); + } + + return SDValue(); +} + +// MatchRotate - Handle an 'or' of two operands. If this is one of the many +// idioms for rotate, and if the target supports rotation instructions, generate +// a rot[lr]. +SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { + // Must be a legal type. Expanded 'n promoted things won't work with rotates. + EVT VT = LHS.getValueType(); + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + // The target must have at least one rotate flavor. + bool HasROTL = hasOperation(ISD::ROTL, VT); + bool HasROTR = hasOperation(ISD::ROTR, VT); + if (!HasROTL && !HasROTR) + return SDValue(); + + // Check for truncated rotate. + if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && + LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { + assert(LHS.getValueType() == RHS.getValueType()); + if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { + return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot); + } + } + + // Match "(X shl/srl V1) & V2" where V2 may not be present. + SDValue LHSShift; // The shift. + SDValue LHSMask; // AND value if any. + matchRotateHalf(DAG, LHS, LHSShift, LHSMask); + + SDValue RHSShift; // The shift. + SDValue RHSMask; // AND value if any. + matchRotateHalf(DAG, RHS, RHSShift, RHSMask); + + // If neither side matched a rotate half, bail + if (!LHSShift && !RHSShift) + return SDValue(); + + // InstCombine may have combined a constant shl, srl, mul, or udiv with one + // side of the rotate, so try to handle that here. In all cases we need to + // pass the matched shift from the opposite side to compute the opcode and + // needed shift amount to extract. We still want to do this if both sides + // matched a rotate half because one half may be a potential overshift that + // can be broken down (ie if InstCombine merged two shl or srl ops into a + // single one). + + // Have LHS side of the rotate, try to extract the needed shift from the RHS. + if (LHSShift) + if (SDValue NewRHSShift = + extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL)) + RHSShift = NewRHSShift; + // Have RHS side of the rotate, try to extract the needed shift from the LHS. + if (RHSShift) + if (SDValue NewLHSShift = + extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL)) + LHSShift = NewLHSShift; + + // If a side is still missing, nothing else we can do. + if (!RHSShift || !LHSShift) + return SDValue(); + + // At this point we've matched or extracted a shift op on each side. + + if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) + return SDValue(); // Not shifting the same value. + + if (LHSShift.getOpcode() == RHSShift.getOpcode()) + return SDValue(); // Shifts must disagree. + + // Canonicalize shl to left side in a shl/srl pair. + if (RHSShift.getOpcode() == ISD::SHL) { + std::swap(LHS, RHS); + std::swap(LHSShift, RHSShift); + std::swap(LHSMask, RHSMask); + } + + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + SDValue LHSShiftArg = LHSShift.getOperand(0); + SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftArg = RHSShift.getOperand(0); + SDValue RHSShiftAmt = RHSShift.getOperand(1); + + // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) + // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; + }; + if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { + SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); + + // If there is an AND of either shifted operand, apply it to the result. + if (LHSMask.getNode() || RHSMask.getNode()) { + SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); + SDValue Mask = AllOnes; + + if (LHSMask.getNode()) { + SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt); + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits)); + } + if (RHSMask.getNode()) { + SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt); + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits)); + } + + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); + } + + return Rot; + } + + // If there is a mask here, and we have a variable shift, we can't be sure + // that we're masking out the right stuff. + if (LHSMask.getNode() || RHSMask.getNode()) + return SDValue(); + + // If the shift amount is sign/zext/any-extended just peel it off. + SDValue LExtOp0 = LHSShiftAmt; + SDValue RExtOp0 = RHSShiftAmt; + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + LExtOp0 = LHSShiftAmt.getOperand(0); + RExtOp0 = RHSShiftAmt.getOperand(0); + } + + SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); + if (TryL) + return TryL; + + SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); + if (TryR) + return TryR; + + return SDValue(); +} + +namespace { + +/// Represents known origin of an individual byte in load combine pattern. The +/// value of the byte is either constant zero or comes from memory. +struct ByteProvider { + // For constant zero providers Load is set to nullptr. For memory providers + // Load represents the node which loads the byte from memory. + // ByteOffset is the offset of the byte in the value produced by the load. + LoadSDNode *Load = nullptr; + unsigned ByteOffset = 0; + + ByteProvider() = default; + + static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) { + return ByteProvider(Load, ByteOffset); + } + + static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); } + + bool isConstantZero() const { return !Load; } + bool isMemory() const { return Load; } + + bool operator==(const ByteProvider &Other) const { + return Other.Load == Load && Other.ByteOffset == ByteOffset; + } + +private: + ByteProvider(LoadSDNode *Load, unsigned ByteOffset) + : Load(Load), ByteOffset(ByteOffset) {} +}; + +} // end anonymous namespace + +/// Recursively traverses the expression calculating the origin of the requested +/// byte of the given value. Returns None if the provider can't be calculated. +/// +/// For all the values except the root of the expression verifies that the value +/// has exactly one use and if it's not true return None. This way if the origin +/// of the byte is returned it's guaranteed that the values which contribute to +/// the byte are not used outside of this expression. +/// +/// Because the parts of the expression are not allowed to have more than one +/// use this function iterates over trees, not DAGs. So it never visits the same +/// node more than once. +static const Optional<ByteProvider> +calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, + bool Root = false) { + // Typical i64 by i8 pattern requires recursion up to 8 calls depth + if (Depth == 10) + return None; + + if (!Root && !Op.hasOneUse()) + return None; + + assert(Op.getValueType().isScalarInteger() && "can't handle other types"); + unsigned BitWidth = Op.getValueSizeInBits(); + if (BitWidth % 8 != 0) + return None; + unsigned ByteWidth = BitWidth / 8; + assert(Index < ByteWidth && "invalid index requested"); + (void) ByteWidth; + + switch (Op.getOpcode()) { + case ISD::OR: { + auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1); + if (!LHS) + return None; + auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1); + if (!RHS) + return None; + + if (LHS->isConstantZero()) + return RHS; + if (RHS->isConstantZero()) + return LHS; + return None; + } + case ISD::SHL: { + auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1)); + if (!ShiftOp) + return None; + + uint64_t BitShift = ShiftOp->getZExtValue(); + if (BitShift % 8 != 0) + return None; + uint64_t ByteShift = BitShift / 8; + + return Index < ByteShift + ? ByteProvider::getConstantZero() + : calculateByteProvider(Op->getOperand(0), Index - ByteShift, + Depth + 1); + } + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: { + SDValue NarrowOp = Op->getOperand(0); + unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); + if (NarrowBitWidth % 8 != 0) + return None; + uint64_t NarrowByteWidth = NarrowBitWidth / 8; + + if (Index >= NarrowByteWidth) + return Op.getOpcode() == ISD::ZERO_EXTEND + ? Optional<ByteProvider>(ByteProvider::getConstantZero()) + : None; + return calculateByteProvider(NarrowOp, Index, Depth + 1); + } + case ISD::BSWAP: + return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1, + Depth + 1); + case ISD::LOAD: { + auto L = cast<LoadSDNode>(Op.getNode()); + if (!L->isSimple() || L->isIndexed()) + return None; + + unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); + if (NarrowBitWidth % 8 != 0) + return None; + uint64_t NarrowByteWidth = NarrowBitWidth / 8; + + if (Index >= NarrowByteWidth) + return L->getExtensionType() == ISD::ZEXTLOAD + ? Optional<ByteProvider>(ByteProvider::getConstantZero()) + : None; + return ByteProvider::getMemory(L, Index); + } + } + + return None; +} + +static unsigned LittleEndianByteAt(unsigned BW, unsigned i) { + return i; +} + +static unsigned BigEndianByteAt(unsigned BW, unsigned i) { + return BW - i - 1; +} + +// Check if the bytes offsets we are looking at match with either big or +// little endian value loaded. Return true for big endian, false for little +// endian, and None if match failed. +static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets, + int64_t FirstOffset) { + // The endian can be decided only when it is 2 bytes at least. + unsigned Width = ByteOffsets.size(); + if (Width < 2) + return None; + + bool BigEndian = true, LittleEndian = true; + for (unsigned i = 0; i < Width; i++) { + int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset; + LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i); + BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i); + if (!BigEndian && !LittleEndian) + return None; + } + + assert((BigEndian != LittleEndian) && "It should be either big endian or" + "little endian"); + return BigEndian; +} + +static SDValue stripTruncAndExt(SDValue Value) { + switch (Value.getOpcode()) { + case ISD::TRUNCATE: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ANY_EXTEND: + return stripTruncAndExt(Value.getOperand(0)); + } + return Value; +} + +/// Match a pattern where a wide type scalar value is stored by several narrow +/// stores. Fold it into a single store or a BSWAP and a store if the targets +/// supports it. +/// +/// Assuming little endian target: +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 0) & 0xFF; +/// p[1] = (val >> 8) & 0xFF; +/// p[2] = (val >> 16) & 0xFF; +/// p[3] = (val >> 24) & 0xFF; +/// => +/// *((i32)p) = val; +/// +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 24) & 0xFF; +/// p[1] = (val >> 16) & 0xFF; +/// p[2] = (val >> 8) & 0xFF; +/// p[3] = (val >> 0) & 0xFF; +/// => +/// *((i32)p) = BSWAP(val); +SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { + // Collect all the stores in the chain. + SDValue Chain; + SmallVector<StoreSDNode *, 8> Stores; + for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) { + // TODO: Allow unordered atomics when wider type is legal (see D66309) + if (Store->getMemoryVT() != MVT::i8 || + !Store->isSimple() || Store->isIndexed()) + return SDValue(); + Stores.push_back(Store); + Chain = Store->getChain(); + } + // Handle the simple type only. + unsigned Width = Stores.size(); + EVT VT = EVT::getIntegerVT( + *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits()); + if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT)) + return SDValue(); + + // Check if all the bytes of the combined value we are looking at are stored + // to the same base address. Collect bytes offsets from Base address into + // ByteOffsets. + SDValue CombinedValue; + SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX); + int64_t FirstOffset = INT64_MAX; + StoreSDNode *FirstStore = nullptr; + Optional<BaseIndexOffset> Base; + for (auto Store : Stores) { + // All the stores store different byte of the CombinedValue. A truncate is + // required to get that byte value. + SDValue Trunc = Store->getValue(); + if (Trunc.getOpcode() != ISD::TRUNCATE) + return SDValue(); + // A shift operation is required to get the right byte offset, except the + // first byte. + int64_t Offset = 0; + SDValue Value = Trunc.getOperand(0); + if (Value.getOpcode() == ISD::SRL || + Value.getOpcode() == ISD::SRA) { + ConstantSDNode *ShiftOffset = + dyn_cast<ConstantSDNode>(Value.getOperand(1)); + // Trying to match the following pattern. The shift offset must be + // a constant and a multiple of 8. It is the byte offset in "y". + // + // x = srl y, offset + // i8 z = trunc x + // store z, ... + if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8)) + return SDValue(); + + Offset = ShiftOffset->getSExtValue()/8; + Value = Value.getOperand(0); + } + + // Stores must share the same combined value with different offsets. + if (!CombinedValue) + CombinedValue = Value; + else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value)) + return SDValue(); + + // The trunc and all the extend operation should be stripped to get the + // real value we are stored. + else if (CombinedValue.getValueType() != VT) { + if (Value.getValueType() == VT || + Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits()) + CombinedValue = Value; + // Give up if the combined value type is smaller than the store size. + if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits()) + return SDValue(); + } + + // Stores must share the same base address + BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG); + int64_t ByteOffsetFromBase = 0; + if (!Base) + Base = Ptr; + else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) + return SDValue(); + + // Remember the first byte store + if (ByteOffsetFromBase < FirstOffset) { + FirstStore = Store; + FirstOffset = ByteOffsetFromBase; + } + // Map the offset in the store and the offset in the combined value, and + // early return if it has been set before. + if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX) + return SDValue(); + ByteOffsets[Offset] = ByteOffsetFromBase; + } + + assert(FirstOffset != INT64_MAX && "First byte offset must be set"); + assert(FirstStore && "First store must be set"); + + // Check if the bytes of the combined value we are looking at match with + // either big or little endian value store. + Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset); + if (!IsBigEndian.hasValue()) + return SDValue(); + + // The node we are looking at matches with the pattern, check if we can + // replace it with a single bswap if needed and store. + + // If the store needs byte swap check if the target supports it + bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian; + + // Before legalize we can introduce illegal bswaps which will be later + // converted to an explicit bswap sequence. This way we end up with a single + // store and byte shuffling instead of several stores and byte shuffling. + if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // Check that a store of the wide type is both allowed and fast on the target + bool Fast = false; + bool Allowed = + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *FirstStore->getMemOperand(), &Fast); + if (!Allowed || !Fast) + return SDValue(); + + if (VT != CombinedValue.getValueType()) { + assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() && + "Get unexpected store value to combine"); + CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, + CombinedValue); + } + + if (NeedsBswap) + CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue); + + SDValue NewStore = + DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(), + FirstStore->getPointerInfo(), FirstStore->getAlignment()); + + // Rely on other DAG combine rules to remove the other individual stores. + DAG.ReplaceAllUsesWith(N, NewStore.getNode()); + return NewStore; +} + +/// Match a pattern where a wide type scalar value is loaded by several narrow +/// loads and combined by shifts and ors. Fold it into a single load or a load +/// and a BSWAP if the targets supports it. +/// +/// Assuming little endian target: +/// i8 *a = ... +/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) +/// => +/// i32 val = *((i32)a) +/// +/// i8 *a = ... +/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] +/// => +/// i32 val = BSWAP(*((i32)a)) +/// +/// TODO: This rule matches complex patterns with OR node roots and doesn't +/// interact well with the worklist mechanism. When a part of the pattern is +/// updated (e.g. one of the loads) its direct users are put into the worklist, +/// but the root node of the pattern which triggers the load combine is not +/// necessarily a direct user of the changed node. For example, once the address +/// of t28 load is reassociated load combine won't be triggered: +/// t25: i32 = add t4, Constant:i32<2> +/// t26: i64 = sign_extend t25 +/// t27: i64 = add t2, t26 +/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64 +/// t29: i32 = zero_extend t28 +/// t32: i32 = shl t29, Constant:i8<8> +/// t33: i32 = or t23, t32 +/// As a possible fix visitLoad can check if the load can be a part of a load +/// combine pattern and add corresponding OR roots to the worklist. +SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { + assert(N->getOpcode() == ISD::OR && + "Can only match load combining against OR nodes"); + + // Handles simple types only + EVT VT = N->getValueType(0); + if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + unsigned ByteWidth = VT.getSizeInBits() / 8; + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Before legalize we can introduce too wide illegal loads which will be later + // split into legal sized loads. This enables us to combine i64 load by i8 + // patterns to a couple of i32 loads on 32 bit targets. + if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT)) + return SDValue(); + + bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); + auto MemoryByteOffset = [&] (ByteProvider P) { + assert(P.isMemory() && "Must be a memory byte provider"); + unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits(); + assert(LoadBitWidth % 8 == 0 && + "can only analyze providers for individual bytes not bit"); + unsigned LoadByteWidth = LoadBitWidth / 8; + return IsBigEndianTarget + ? BigEndianByteAt(LoadByteWidth, P.ByteOffset) + : LittleEndianByteAt(LoadByteWidth, P.ByteOffset); + }; + + Optional<BaseIndexOffset> Base; + SDValue Chain; + + SmallPtrSet<LoadSDNode *, 8> Loads; + Optional<ByteProvider> FirstByteProvider; + int64_t FirstOffset = INT64_MAX; + + // Check if all the bytes of the OR we are looking at are loaded from the same + // base address. Collect bytes offsets from Base address in ByteOffsets. + SmallVector<int64_t, 4> ByteOffsets(ByteWidth); + for (unsigned i = 0; i < ByteWidth; i++) { + auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true); + if (!P || !P->isMemory()) // All the bytes must be loaded from memory + return SDValue(); + + LoadSDNode *L = P->Load; + assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && + !L->isIndexed() && + "Must be enforced by calculateByteProvider"); + assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); + + // All loads must share the same chain + SDValue LChain = L->getChain(); + if (!Chain) + Chain = LChain; + else if (Chain != LChain) + return SDValue(); + + // Loads must share the same base address + BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG); + int64_t ByteOffsetFromBase = 0; + if (!Base) + Base = Ptr; + else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) + return SDValue(); + + // Calculate the offset of the current byte from the base address + ByteOffsetFromBase += MemoryByteOffset(*P); + ByteOffsets[i] = ByteOffsetFromBase; + + // Remember the first byte load + if (ByteOffsetFromBase < FirstOffset) { + FirstByteProvider = P; + FirstOffset = ByteOffsetFromBase; + } + + Loads.insert(L); + } + assert(!Loads.empty() && "All the bytes of the value must be loaded from " + "memory, so there must be at least one load which produces the value"); + assert(Base && "Base address of the accessed memory location must be set"); + assert(FirstOffset != INT64_MAX && "First byte offset must be set"); + + // Check if the bytes of the OR we are looking at match with either big or + // little endian value load + Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset); + if (!IsBigEndian.hasValue()) + return SDValue(); + + assert(FirstByteProvider && "must be set"); + + // Ensure that the first byte is loaded from zero offset of the first load. + // So the combined value can be loaded from the first load address. + if (MemoryByteOffset(*FirstByteProvider) != 0) + return SDValue(); + LoadSDNode *FirstLoad = FirstByteProvider->Load; + + // The node we are looking at matches with the pattern, check if we can + // replace it with a single load and bswap if needed. + + // If the load needs byte swap check if the target supports it + bool NeedsBswap = IsBigEndianTarget != *IsBigEndian; + + // Before legalize we can introduce illegal bswaps which will be later + // converted to an explicit bswap sequence. This way we end up with a single + // load and byte shuffling instead of several loads and byte shuffling. + if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // Check that a load of the wide type is both allowed and fast on the target + bool Fast = false; + bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), + VT, *FirstLoad->getMemOperand(), &Fast); + if (!Allowed || !Fast) + return SDValue(); + + SDValue NewLoad = + DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), FirstLoad->getAlignment()); + + // Transfer chain users from old loads to the new load. + for (LoadSDNode *L : Loads) + DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); + + return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad; +} + +// If the target has andn, bsl, or a similar bit-select instruction, +// we want to unfold masked merge, with canonical pattern of: +// | A | |B| +// ((x ^ y) & m) ^ y +// | D | +// Into: +// (x & m) | (y & ~m) +// If y is a constant, and the 'andn' does not work with immediates, +// we unfold into a different pattern: +// ~(~x & m) & (m | y) +// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at +// the very least that breaks andnpd / andnps patterns, and because those +// patterns are simplified in IR and shouldn't be created in the DAG +SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { + assert(N->getOpcode() == ISD::XOR); + + // Don't touch 'not' (i.e. where y = -1). + if (isAllOnesOrAllOnesSplat(N->getOperand(1))) + return SDValue(); + + EVT VT = N->getValueType(0); + + // There are 3 commutable operators in the pattern, + // so we have to deal with 8 possible variants of the basic pattern. + SDValue X, Y, M; + auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) { + if (And.getOpcode() != ISD::AND || !And.hasOneUse()) + return false; + SDValue Xor = And.getOperand(XorIdx); + if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) + return false; + SDValue Xor0 = Xor.getOperand(0); + SDValue Xor1 = Xor.getOperand(1); + // Don't touch 'not' (i.e. where y = -1). + if (isAllOnesOrAllOnesSplat(Xor1)) + return false; + if (Other == Xor0) + std::swap(Xor0, Xor1); + if (Other != Xor1) + return false; + X = Xor0; + Y = Xor1; + M = And.getOperand(XorIdx ? 0 : 1); + return true; + }; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) && + !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0)) + return SDValue(); + + // Don't do anything if the mask is constant. This should not be reachable. + // InstCombine should have already unfolded this pattern, and DAGCombiner + // probably shouldn't produce it, too. + if (isa<ConstantSDNode>(M.getNode())) + return SDValue(); + + // We can transform if the target has AndNot + if (!TLI.hasAndNot(M)) + return SDValue(); + + SDLoc DL(N); + + // If Y is a constant, check that 'andn' works with immediates. + if (!TLI.hasAndNot(Y)) { + assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); + // If not, we need to do a bit more work to make sure andn is still used. + SDValue NotX = DAG.getNOT(DL, X, VT); + SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M); + SDValue NotLHS = DAG.getNOT(DL, LHS, VT); + SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y); + return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS); + } + + SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); + SDValue NotM = DAG.getNOT(DL, M, VT); + SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); + + return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); +} + +SDValue DAGCombiner::visitXOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (xor x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + } + + // fold (xor undef, undef) -> 0. This is a common idiom (misuse). + SDLoc DL(N); + if (N0.isUndef() && N1.isUndef()) + return DAG.getConstant(0, DL, VT); + // fold (xor x, undef) -> undef + if (N0.isUndef()) + return N0; + if (N1.isUndef()) + return N1; + // fold (xor c1, c2) -> c1^c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C); + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + // fold (xor x, 0) -> x + if (isNullConstant(N1)) + return N0; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + // reassociate xor + if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) + return RXOR; + + // fold !(x cc y) -> (x !cc y) + unsigned N0Opcode = N0.getOpcode(); + SDValue LHS, RHS, CC; + if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { + ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + LHS.getValueType().isInteger()); + if (!LegalOperations || + TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { + switch (N0Opcode) { + default: + llvm_unreachable("Unhandled SetCC Equivalent!"); + case ISD::SETCC: + return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC); + case ISD::SELECT_CC: + return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2), + N0.getOperand(3), NotCC); + } + } + } + + // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) + if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() && + isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ + SDValue V = N0.getOperand(0); + SDLoc DL0(N0); + V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V, + DAG.getConstant(1, DL0, V.getValueType())); + AddToWorklist(V.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V); + } + + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc + if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && + (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) { + unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); + } + } + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants + if (isAllOnesConstant(N1) && N0.hasOneUse() && + (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) { + unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); + } + } + + // fold (not (neg x)) -> (add X, -1) + // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if + // Y is a constant or the subtract has a single use. + if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB && + isNullConstant(N0.getOperand(0))) { + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), + DAG.getAllOnesConstant(DL, VT)); + } + + // fold (xor (and x, y), y) -> (and (not x), y) + if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) { + SDValue X = N0.getOperand(0); + SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); + AddToWorklist(NotX.getNode()); + return DAG.getNode(ISD::AND, DL, VT, NotX, N1); + } + + if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) { + ConstantSDNode *XorC = isConstOrConstSplat(N1); + ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1)); + unsigned BitWidth = VT.getScalarSizeInBits(); + if (XorC && ShiftC) { + // Don't crash on an oversized shift. We can not guarantee that a bogus + // shift has been simplified to undef. + uint64_t ShiftAmt = ShiftC->getLimitedValue(); + if (ShiftAmt < BitWidth) { + APInt Ones = APInt::getAllOnesValue(BitWidth); + Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt); + if (XorC->getAPIntValue() == Ones) { + // If the xor constant is a shifted -1, do a 'not' before the shift: + // xor (X << ShiftC), XorC --> (not X) << ShiftC + // xor (X >> ShiftC), XorC --> (not X) >> ShiftC + SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT); + return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1)); + } + } + } + } + + // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + SDValue A = N0Opcode == ISD::ADD ? N0 : N1; + SDValue S = N0Opcode == ISD::SRA ? N0 : N1; + if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { + SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); + SDValue S0 = S.getOperand(0); + if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) { + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, DL, VT, S0); + } + } + } + + // fold (xor x, x) -> 0 + if (N0 == N1) + return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); + + // fold (xor (shl 1, x), -1) -> (rotl ~1, x) + // Here is a concrete example of this equivalence: + // i16 x == 14 + // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 + // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 + // + // => + // + // i16 ~1 == 0b1111111111111110 + // i16 rol(~1, 14) == 0b1011111111111111 + // + // Some additional tips to help conceptualize this transform: + // - Try to see the operation as placing a single zero in a value of all ones. + // - There exists no value for x which would allow the result to contain zero. + // - Values of x larger than the bitwidth are undefined and do not require a + // consistent result. + // - Pushing the zero left requires shifting one bits in from the right. + // A rotate left of ~1 is a nice way of achieving the desired result. + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL && + isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { + return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), + N0.getOperand(1)); + } + + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) + if (N0Opcode == N1.getOpcode()) + if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) + return V; + + // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable + if (SDValue MM = unfoldMaskedMerge(N)) + return MM; + + // Simplify the expression using non-local knowledge. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +/// If we have a shift-by-constant of a bitwise logic op that itself has a +/// shift-by-constant operand with identical opcode, we may be able to convert +/// that into 2 independent shifts followed by the logic op. This is a +/// throughput improvement. +static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { + // Match a one-use bitwise logic op. + SDValue LogicOp = Shift->getOperand(0); + if (!LogicOp.hasOneUse()) + return SDValue(); + + unsigned LogicOpcode = LogicOp.getOpcode(); + if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR && + LogicOpcode != ISD::XOR) + return SDValue(); + + // Find a matching one-use shift by constant. + unsigned ShiftOpcode = Shift->getOpcode(); + SDValue C1 = Shift->getOperand(1); + ConstantSDNode *C1Node = isConstOrConstSplat(C1); + assert(C1Node && "Expected a shift with constant operand"); + const APInt &C1Val = C1Node->getAPIntValue(); + auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp, + const APInt *&ShiftAmtVal) { + if (V.getOpcode() != ShiftOpcode || !V.hasOneUse()) + return false; + + ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1)); + if (!ShiftCNode) + return false; + + // Capture the shifted operand and shift amount value. + ShiftOp = V.getOperand(0); + ShiftAmtVal = &ShiftCNode->getAPIntValue(); + + // Shift amount types do not have to match their operand type, so check that + // the constants are the same width. + if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth()) + return false; + + // The fold is not valid if the sum of the shift values exceeds bitwidth. + if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits())) + return false; + + return true; + }; + + // Logic ops are commutative, so check each operand for a match. + SDValue X, Y; + const APInt *C0Val; + if (matchFirstShift(LogicOp.getOperand(0), X, C0Val)) + Y = LogicOp.getOperand(1); + else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val)) + Y = LogicOp.getOperand(0); + else + return SDValue(); + + // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1) + SDLoc DL(Shift); + EVT VT = Shift->getValueType(0); + EVT ShiftAmtVT = Shift->getOperand(1).getValueType(); + SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT); + SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC); + SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1); + return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2); +} + +/// Handle transforms common to the three shifts, when the shift amount is a +/// constant. +/// We are looking for: (shift being one of shl/sra/srl) +/// shift (binop X, C0), C1 +/// And want to transform into: +/// binop (shift X, C1), (shift C0, C1) +SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { + assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand"); + + // Do not turn a 'not' into a regular xor. + if (isBitwiseNot(N->getOperand(0))) + return SDValue(); + + // The inner binop must be one-use, since we want to replace it. + SDValue LHS = N->getOperand(0); + if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level)) + return SDValue(); + + // TODO: This is limited to early combining because it may reveal regressions + // otherwise. But since we just checked a target hook to see if this is + // desirable, that should have filtered out cases where this interferes + // with some other pattern matching. + if (!LegalTypes) + if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) + return R; + + // We want to pull some binops through shifts, so that we have (and (shift)) + // instead of (shift (and)), likewise for add, or, xor, etc. This sort of + // thing happens with address calculations, so it's important to canonicalize + // it. + switch (LHS.getOpcode()) { + default: + return SDValue(); + case ISD::OR: + case ISD::XOR: + case ISD::AND: + break; + case ISD::ADD: + if (N->getOpcode() != ISD::SHL) + return SDValue(); // only shl(add) not sr[al](add). + break; + } + + // We require the RHS of the binop to be a constant and not opaque as well. + ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1)); + if (!BinOpCst) + return SDValue(); + + // FIXME: disable this unless the input to the binop is a shift by a constant + // or is copy/select. Enable this in other cases when figure out it's exactly + // profitable. + SDValue BinOpLHSVal = LHS.getOperand(0); + bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL || + BinOpLHSVal.getOpcode() == ISD::SRA || + BinOpLHSVal.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(BinOpLHSVal.getOperand(1)); + bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg || + BinOpLHSVal.getOpcode() == ISD::SELECT; + + if (!IsShiftByConstant && !IsCopyOrSelect) + return SDValue(); + + if (IsCopyOrSelect && N->hasOneUse()) + return SDValue(); + + // Fold the constants, shifting the binop RHS by the shift amount. + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1), + N->getOperand(1)); + assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); + + SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), + N->getOperand(1)); + return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS); +} + +SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { + assert(N->getOpcode() == ISD::TRUNCATE); + assert(N->getOperand(0).getOpcode() == ISD::AND); + + // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) + EVT TruncVT = N->getValueType(0); + if (N->hasOneUse() && N->getOperand(0).hasOneUse() && + TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) { + SDValue N01 = N->getOperand(0).getOperand(1); + if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) { + SDLoc DL(N); + SDValue N00 = N->getOperand(0).getOperand(0); + SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00); + SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01); + AddToWorklist(Trunc00.getNode()); + AddToWorklist(Trunc01.getNode()); + return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitRotate(SDNode *N) { + SDLoc dl(N); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + unsigned Bitsize = VT.getScalarSizeInBits(); + + // fold (rot x, 0) -> x + if (isNullOrNullSplat(N1)) + return N0; + + // fold (rot x, c) -> x iff (c % BitSize) == 0 + if (isPowerOf2_32(Bitsize) && Bitsize > 1) { + APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1); + if (DAG.MaskedValueIsZero(N1, ModuloMask)) + return N0; + } + + // fold (rot x, c) -> (rot x, c % BitSize) + // TODO - support non-uniform vector amounts. + if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) { + if (Cst->getAPIntValue().uge(Bitsize)) { + uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize); + return DAG.getNode(N->getOpcode(), dl, VT, N0, + DAG.getConstant(RotAmt, dl, N1.getValueType())); + } + } + + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) + return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1); + } + + unsigned NextOp = N0.getOpcode(); + // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) + if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { + SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); + SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); + if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) { + EVT ShiftVT = C1->getValueType(0); + bool SameSide = (N->getOpcode() == NextOp); + unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; + if (SDValue CombinedShift = + DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) { + SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); + SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( + ISD::SREM, dl, ShiftVT, CombinedShift.getNode(), + BitsizeC.getNode()); + return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), + CombinedShiftNorm); + } + } + } + return SDValue(); +} + +SDValue DAGCombiner::visitSHL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (SDValue V = DAG.simplifyShift(N0, N1)) + return V; + + EVT VT = N0.getValueType(); + EVT ShiftVT = N1.getValueType(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); + // If setcc produces all-one true value then: + // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) + if (N1CV && N1CV->isConstant()) { + if (N0.getOpcode() == ISD::AND) { + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); + + if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && + TLI.getBooleanContents(N00.getOperand(0).getValueType()) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, + N01CV, N1CV)) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); + } + } + } + } + + ConstantSDNode *N1C = isConstOrConstSplat(N1); + + // fold (shl c1, c2) -> c1<<c2 + // TODO - support non-uniform vector shift amounts. + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + // if (shl x, c) is known to be zero, return 0 + if (DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(OpSizeInBits))) + return DAG.getConstant(0, SDLoc(N), VT); + + // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); + } + + // TODO - support non-uniform vector shift amounts. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) + if (N0.getOpcode() == ISD::SHL) { + auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).uge(OpSizeInBits); + }; + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + return DAG.getConstant(0, SDLoc(N), VT); + + auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).ult(OpSizeInBits); + }; + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + SDLoc DL(N); + SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum); + } + } + + // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2)) + // For this to be valid, the second form must not preserve any of the bits + // that are shifted out by the inner shift in the first form. This means + // the outer shift size must be >= the number of bits added by the ext. + // As a corollary, we don't care what kind of ext it is. + if ((N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) && + N0.getOperand(0).getOpcode() == ISD::SHL) { + SDValue N0Op0 = N0.getOperand(0); + SDValue InnerShiftAmt = N0Op0.getOperand(1); + EVT InnerVT = N0Op0.getValueType(); + uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits(); + + auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return c2.uge(OpSizeInBits - InnerBitwidth) && + (c1 + c2).uge(OpSizeInBits); + }; + if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) + return DAG.getConstant(0, SDLoc(N), VT); + + auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return c2.uge(OpSizeInBits - InnerBitwidth) && + (c1 + c2).ult(OpSizeInBits); + }; + if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDLoc DL(N); + SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0)); + SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT); + Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1); + return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum); + } + } + + // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) + // Only fold this if the inner zext has no other uses to avoid increasing + // the total number of instructions. + if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::SRL) { + SDValue N0Op0 = N0.getOperand(0); + SDValue InnerShiftAmt = N0Op0.getOperand(1); + + auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2); + return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2); + }; + if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDLoc DL(N); + EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT); + NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL); + AddToWorklist(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } + } + + // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 + // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 + // TODO - support non-uniform vector shift amounts. + if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && + N0->getFlags().hasExact()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t C1 = N0C1->getZExtValue(); + uint64_t C2 = N1C->getZExtValue(); + SDLoc DL(N); + if (C1 <= C2) + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(C2 - C1, DL, ShiftVT)); + return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), + DAG.getConstant(C1 - C2, DL, ShiftVT)); + } + } + + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or + // (and (srl x, (sub c1, c2), MASK) + // Only fold this if the inner shift has no other uses -- if it does, folding + // this will increase the total number of instructions. + // TODO - drop hasOneUse requirement if c1 == c2? + // TODO - support non-uniform vector shift amounts. + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && + TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + if (N0C1->getAPIntValue().ult(OpSizeInBits)) { + uint64_t c1 = N0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); + SDValue Shift; + if (c2 > c1) { + Mask <<= c2 - c1; + SDLoc DL(N); + Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c2 - c1, DL, ShiftVT)); + } else { + Mask.lshrInPlace(c1 - c2); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 - c2, DL, ShiftVT)); + } + SDLoc DL(N0); + return DAG.getNode(ISD::AND, DL, VT, Shift, + DAG.getConstant(Mask, DL, VT)); + } + } + } + + // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) + if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) && + isConstantOrConstantVector(N1, /* No Opaques */ true)) { + SDLoc DL(N); + SDValue AllBits = DAG.getAllOnesConstant(DL, VT); + SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); + } + + // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) + // Variant of version done on multiply, except mul by a power of 2 is turned + // into a shift. + if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && + N0.getNode()->hasOneUse() && + isConstantOrConstantVector(N1, /* No Opaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) && + TLI.isDesirableToCommuteWithShift(N, Level)) { + SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); + SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); + AddToWorklist(Shl0.getNode()); + AddToWorklist(Shl1.getNode()); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); + } + + // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) + if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() && + isConstantOrConstantVector(N1, /* No Opaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); + if (isConstantOrConstantVector(Shl)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); + } + + if (N1C && !N1C->isOpaque()) + if (SDValue NewSHL = visitShiftByConstant(N)) + return NewSHL; + + return SDValue(); +} + +SDValue DAGCombiner::visitSRA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (SDValue V = DAG.simplifyShift(N0, N1)) + return V; + + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + + // Arithmetic shifting an all-sign-bit value is a no-op. + // fold (sra 0, x) -> 0 + // fold (sra -1, x) -> -1 + if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) + return N0; + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + ConstantSDNode *N1C = isConstOrConstSplat(N1); + + // fold (sra c1, c2) -> (sra c1, c2) + // TODO - support non-uniform vector shift amounts. + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports + // sext_inreg. + if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { + unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); + if (VT.isVector()) + ExtVT = EVT::getVectorVT(*DAG.getContext(), + ExtVT, VT.getVectorNumElements()); + if ((!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, + N0.getOperand(0), DAG.getValueType(ExtVT)); + } + + // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) + // clamp (add c1, c2) to max shift. + if (N0.getOpcode() == ISD::SRA) { + SDLoc DL(N); + EVT ShiftVT = N1.getValueType(); + EVT ShiftSVT = ShiftVT.getScalarType(); + SmallVector<SDValue, 16> ShiftValues; + + auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + APInt Sum = c1 + c2; + unsigned ShiftSum = + Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue(); + ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT)); + return true; + }; + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) { + SDValue ShiftValue; + if (VT.isVector()) + ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues); + else + ShiftValue = ShiftValues[0]; + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue); + } + } + + // fold (sra (shl X, m), (sub result_size, n)) + // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for + // result_size - n != m. + // If truncate is free for the target sext(shl) is likely to result in better + // code. + if (N0.getOpcode() == ISD::SHL && N1C) { + // Get the two constanst of the shifts, CN0 = m, CN = n. + const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); + if (N01C) { + LLVMContext &Ctx = *DAG.getContext(); + // Determine what the truncate's result bitsize and type would be. + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); + + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + + // Determine the residual right-shift amount. + int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); + + // If the shift is not a no-op (in which case this should be just a sign + // extend already), the truncated to type is legal, sign_extend is legal + // on that type, and the truncate to that type is both legal and free, + // perform the transform. + if ((ShiftAmt > 0) && + TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && + TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && + TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Amt = DAG.getConstant(ShiftAmt, DL, + getShiftAmountTy(N0.getOperand(0).getValueType())); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, + N0.getOperand(0), Amt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, + Shift); + return DAG.getNode(ISD::SIGN_EXTEND, DL, + N->getValueType(0), Trunc); + } + } + } + + // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. + // sra (add (shl X, N1C), AddC), N1C --> + // sext (add (trunc X to (width - N1C)), AddC') + if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && + N0.getOperand(0).getOpcode() == ISD::SHL && + N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { + if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { + SDValue Shl = N0.getOperand(0); + // Determine what the truncate's type would be and ask the target if that + // is a free operation. + LLVMContext &Ctx = *DAG.getContext(); + unsigned ShiftAmt = N1C->getZExtValue(); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + + // TODO: The simple type check probably belongs in the default hook + // implementation and/or target-specific overrides (because + // non-simple types likely require masking when legalized), but that + // restriction may conflict with other transforms. + if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); + SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). + trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT); + SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); + return DAG.getSExtOrTrunc(Add, DL, VT); + } + } + } + + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); + } + + // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2)) + // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) + // if c1 is equal to the number of bits the trunc removes + // TODO - support non-uniform vector shift amounts. + if (N0.getOpcode() == ISD::TRUNCATE && + (N0.getOperand(0).getOpcode() == ISD::SRL || + N0.getOperand(0).getOpcode() == ISD::SRA) && + N0.getOperand(0).hasOneUse() && + N0.getOperand(0).getOperand(1).hasOneUse() && N1C) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { + EVT LargeVT = N0Op0.getValueType(); + unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits; + if (LargeShift->getAPIntValue() == TruncBits) { + SDLoc DL(N); + SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL, + getShiftAmountTy(LargeVT)); + SDValue SRA = + DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt); + return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); + } + } + } + + // Simplify, based on bits shifted out of the LHS. + // TODO - support non-uniform vector shift amounts. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // If the sign bit is known to be zero, switch this to a SRL. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); + + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRA = visitShiftByConstant(N)) + return NewSRA; + + return SDValue(); +} + +SDValue DAGCombiner::visitSRL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (SDValue V = DAG.simplifyShift(N0, N1)) + return V; + + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + ConstantSDNode *N1C = isConstOrConstSplat(N1); + + // fold (srl c1, c2) -> c1 >>u c2 + // TODO - support non-uniform vector shift amounts. + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + // if (srl x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(OpSizeInBits))) + return DAG.getConstant(0, SDLoc(N), VT); + + // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) + if (N0.getOpcode() == ISD::SRL) { + auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).uge(OpSizeInBits); + }; + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + return DAG.getConstant(0, SDLoc(N), VT); + + auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).ult(OpSizeInBits); + }; + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + SDLoc DL(N); + EVT ShiftVT = N1.getValueType(); + SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum); + } + } + + // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) + // TODO - support non-uniform vector shift amounts. + if (N1C && N0.getOpcode() == ISD::TRUNCATE && + N0.getOperand(0).getOpcode() == ISD::SRL) { + if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) { + uint64_t c1 = N001C->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0.getOperand(0).getValueType(); + EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); + // This is only valid if the OpSizeInBits + c1 = size of inner shift. + if (c1 + OpSizeInBits == InnerShiftSize) { + SDLoc DL(N0); + if (c1 + c2 >= InnerShiftSize) + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(ISD::SRL, DL, InnerShiftVT, + N0.getOperand(0).getOperand(0), + DAG.getConstant(c1 + c2, DL, + ShiftCountVT))); + } + } + } + + // fold (srl (shl x, c), c) -> (and x, cst2) + // TODO - (srl (shl x, c1), c2). + if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && + isConstantOrConstantVector(N1, /* NoOpaques */ true)) { + SDLoc DL(N); + SDValue Mask = + DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); + AddToWorklist(Mask.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); + } + + // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) + // TODO - support non-uniform vector shift amounts. + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + // Shifting in all undef bits? + EVT SmallVT = N0.getOperand(0).getValueType(); + unsigned BitSize = SmallVT.getScalarSizeInBits(); + if (N1C->getAPIntValue().uge(BitSize)) + return DAG.getUNDEF(VT); + + if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + uint64_t ShiftAmt = N1C->getZExtValue(); + SDLoc DL0(N0); + SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, + N0.getOperand(0), + DAG.getConstant(ShiftAmt, DL0, + getShiftAmountTy(SmallVT))); + AddToWorklist(SmallShift.getNode()); + APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), + DAG.getConstant(Mask, DL, VT)); + } + } + + // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign + // bit, which is unmodified by sra. + if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) { + if (N0.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); + } + + // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). + if (N1C && N0.getOpcode() == ISD::CTLZ && + N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { + KnownBits Known = DAG.computeKnownBits(N0.getOperand(0)); + + // If any of the input bits are KnownOne, then the input couldn't be all + // zeros, thus the result of the srl will always be zero. + if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); + + // If all of the bits input the to ctlz node are known to be zero, then + // the result of the ctlz is "32" and the result of the shift is one. + APInt UnknownBits = ~Known.Zero; + if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); + + // Otherwise, check to see if there is exactly one bit input to the ctlz. + if (UnknownBits.isPowerOf2()) { + // Okay, we know that only that the single bit specified by UnknownBits + // could be set on input to the CTLZ node. If this bit is set, the SRL + // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair + // to an SRL/XOR pair, which is likely to simplify more. + unsigned ShAmt = UnknownBits.countTrailingZeros(); + SDValue Op = N0.getOperand(0); + + if (ShAmt) { + SDLoc DL(N0); + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getConstant(ShAmt, DL, + getShiftAmountTy(Op.getValueType()))); + AddToWorklist(Op.getNode()); + } + + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, + Op, DAG.getConstant(1, DL, VT)); + } + } + + // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); + } + + // fold operands of srl based on knowledge that the low bits are not + // demanded. + // TODO - support non-uniform vector shift amounts. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRL = visitShiftByConstant(N)) + return NewSRL; + + // Attempt to convert a srl of a load into a narrower zero-extending load. + if (SDValue NarrowLoad = ReduceLoadWidth(N)) + return NarrowLoad; + + // Here is a common situation. We want to optimize: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // However when after the source operand of SRL is optimized into AND, the SRL + // itself may not be optimized further. Look for it and add the BRCOND into + // the worklist. + if (N->hasOneUse()) { + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorklist(Use); + else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { + // Also look pass the truncate. + Use = *Use->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorklist(Use); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFunnelShift(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + bool IsFSHL = N->getOpcode() == ISD::FSHL; + unsigned BitWidth = VT.getScalarSizeInBits(); + + // fold (fshl N0, N1, 0) -> N0 + // fold (fshr N0, N1, 0) -> N1 + if (isPowerOf2_32(BitWidth)) + if (DAG.MaskedValueIsZero( + N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1))) + return IsFSHL ? N0 : N1; + + auto IsUndefOrZero = [](SDValue V) { + return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true); + }; + + // TODO - support non-uniform vector shift amounts. + if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) { + EVT ShAmtTy = N2.getValueType(); + + // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) + if (Cst->getAPIntValue().uge(BitWidth)) { + uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth); + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1, + DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy)); + } + + unsigned ShAmt = Cst->getZExtValue(); + if (ShAmt == 0) + return IsFSHL ? N0 : N1; + + // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C) + // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C) + // fold fshl(N0, undef_or_zero, C) -> shl(N0, C) + // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C) + if (IsUndefOrZero(N0)) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, + DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, + SDLoc(N), ShAmtTy)); + if (IsUndefOrZero(N1)) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, + DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, + SDLoc(N), ShAmtTy)); + } + + // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2) + // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2) + // iff We know the shift amount is in range. + // TODO: when is it worth doing SUB(BW, N2) as well? + if (isPowerOf2_32(BitWidth)) { + APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1); + if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits)) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2); + if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits)) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2); + } + + // fold (fshl N0, N0, N2) -> (rotl N0, N2) + // fold (fshr N0, N0, N2) -> (rotr N0, N2) + // TODO: Investigate flipping this rotate if only one is legal, if funnel shift + // is legal as well we might be better off avoiding non-constant (BW - N2). + unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR; + if (N0 == N1 && hasOperation(RotOpc, VT)) + return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2); + + // Simplify, based on bits shifted out of N0/N1. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +SDValue DAGCombiner::visitABS(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (abs c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0); + // fold (abs (abs x)) -> (abs x) + if (N0.getOpcode() == ISD::ABS) + return N0; + // fold (abs x) -> x iff not-negative + if (DAG.SignBitIsZero(N0)) + return N0; + return SDValue(); +} + +SDValue DAGCombiner::visitBSWAP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (bswap c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); + // fold (bswap (bswap x)) -> x + if (N0.getOpcode() == ISD::BSWAP) + return N0->getOperand(0); + return SDValue(); +} + +SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (bitreverse c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0); + // fold (bitreverse (bitreverse x)) -> x + if (N0.getOpcode() == ISD::BITREVERSE) + return N0.getOperand(0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTLZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctlz c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); + + // If the value is known never to be zero, switch to the undef version. + if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) { + if (DAG.isKnownNeverZero(N0)) + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctlz_zero_undef c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTTZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); + + // If the value is known never to be zero, switch to the undef version. + if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { + if (DAG.isKnownNeverZero(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz_zero_undef c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTPOP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctpop c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); + return SDValue(); +} + +// FIXME: This should be checking for no signed zeros on individual operands, as +// well as no nans. +static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, + SDValue RHS, + const TargetLowering &TLI) { + const TargetOptions &Options = DAG.getTarget().Options; + EVT VT = LHS.getValueType(); + + return Options.NoSignedZerosFPMath && VT.isFloatingPoint() && + TLI.isProfitableToCombineMinNumMaxNum(VT) && + DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); +} + +/// Generate Min/Max node +static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, SDValue False, + ISD::CondCode CC, const TargetLowering &TLI, + SelectionDAG &DAG) { + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) + return SDValue(); + + EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + switch (CC) { + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETULT: + case ISD::SETULE: { + // Since it's known never nan to get here already, either fminnum or + // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is + // expanded in terms of it. + unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE; + if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) + return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); + + unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; + if (TLI.isOperationLegalOrCustom(Opcode, TransformVT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: { + unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE; + if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) + return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); + + unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; + if (TLI.isOperationLegalOrCustom(Opcode, TransformVT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + default: + return SDValue(); + } +} + +/// If a (v)select has a condition value that is a sign-bit test, try to smear +/// the condition operand sign-bit across the value width and use it as a mask. +static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { + SDValue Cond = N->getOperand(0); + SDValue C1 = N->getOperand(1); + SDValue C2 = N->getOperand(2); + assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && + "Expected select-of-constants"); + + EVT VT = N->getValueType(0); + if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() || + VT != Cond.getOperand(0).getValueType()) + return SDValue(); + + // The inverted-condition + commuted-select variants of these patterns are + // canonicalized to these forms in IR. + SDValue X = Cond.getOperand(0); + SDValue CondC = Cond.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) && + isAllOnesOrAllOnesSplat(C2)) { + // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::OR, DL, VT, Sra, C1); + } + if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) { + // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::AND, DL, VT, Sra, C1); + } + return SDValue(); +} + +SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { + SDValue Cond = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + EVT CondVT = Cond.getValueType(); + SDLoc DL(N); + + if (!VT.isInteger()) + return SDValue(); + + auto *C1 = dyn_cast<ConstantSDNode>(N1); + auto *C2 = dyn_cast<ConstantSDNode>(N2); + if (!C1 || !C2) + return SDValue(); + + // Only do this before legalization to avoid conflicting with target-specific + // transforms in the other direction (create a select from a zext/sext). There + // is also a target-independent combine here in DAGCombiner in the other + // direction for (select Cond, -1, 0) when the condition is not i1. + if (CondVT == MVT::i1 && !LegalOperations) { + if (C1->isNullValue() && C2->isOne()) { + // select Cond, 0, 1 --> zext (!Cond) + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + if (VT != MVT::i1) + NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); + return NotCond; + } + if (C1->isNullValue() && C2->isAllOnesValue()) { + // select Cond, 0, -1 --> sext (!Cond) + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + if (VT != MVT::i1) + NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); + return NotCond; + } + if (C1->isOne() && C2->isNullValue()) { + // select Cond, 1, 0 --> zext (Cond) + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + return Cond; + } + if (C1->isAllOnesValue() && C2->isNullValue()) { + // select Cond, -1, 0 --> sext (Cond) + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); + return Cond; + } + + // Use a target hook because some targets may prefer to transform in the + // other direction. + if (TLI.convertSelectOfConstantsToMath(VT)) { + // For any constants that differ by 1, we can transform the select into an + // extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); + if (C1Val - 1 == C2Val) { + // select Cond, C1, C1-1 --> add (zext Cond), C1-1 + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); + } + if (C1Val + 1 == C2Val) { + // select Cond, C1, C1+1 --> add (sext Cond), C1+1 + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); + } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isNullValue()) { + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + } + + return SDValue(); + } + + // fold (select Cond, 0, 1) -> (xor Cond, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. + if (CondVT.isInteger() && + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == + TargetLowering::ZeroOrOneBooleanContent && + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == + TargetLowering::ZeroOrOneBooleanContent && + C1->isNullValue() && C2->isOne()) { + SDValue NotCond = + DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); + if (VT.bitsEq(CondVT)) + return NotCond; + return DAG.getZExtOrTrunc(NotCond, DL, VT); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + EVT VT0 = N0.getValueType(); + SDLoc DL(N); + SDNodeFlags Flags = N->getFlags(); + + if (SDValue V = DAG.simplifySelect(N0, N1, N2)) + return V; + + // fold (select X, X, Y) -> (or X, Y) + // fold (select X, 1, Y) -> (or C, Y) + if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) + return DAG.getNode(ISD::OR, DL, VT, N0, N2); + + if (SDValue V = foldSelectOfConstants(N)) + return V; + + // fold (select C, 0, X) -> (and (not C), X) + if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); + AddToWorklist(NOTNode.getNode()); + return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2); + } + // fold (select C, X, 1) -> (or (not C), X) + if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); + AddToWorklist(NOTNode.getNode()); + return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1); + } + // fold (select X, Y, X) -> (and X, Y) + // fold (select X, Y, 0) -> (and X, Y) + if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) + return DAG.getNode(ISD::AND, DL, VT, N0, N1); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + + if (VT0 == MVT::i1) { + // The code in this block deals with the following 2 equivalences: + // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) + // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) + // The target can specify its preferred form with the + // shouldNormalizeToSelectSequence() callback. However we always transform + // to the right anyway if we find the inner select exists in the DAG anyway + // and we always transform to the left side if we know that we can further + // optimize the combination of the conditions. + bool normalizeToSequence = + TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = + DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags); + if (normalizeToSequence || !InnerSelect.use_empty()) + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, + InnerSelect, N2, Flags); + // Cleanup on failure. + if (InnerSelect.use_empty()) + recursivelyDeleteUnusedNodes(InnerSelect.getNode()); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(), + Cond1, N1, N2, Flags); + if (normalizeToSequence || !InnerSelect.use_empty()) + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1, + InnerSelect, Flags); + // Cleanup on failure. + if (InnerSelect.use_empty()) + recursivelyDeleteUnusedNodes(InnerSelect.getNode()); + } + + // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y + if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { + SDValue N1_0 = N1->getOperand(0); + SDValue N1_1 = N1->getOperand(1); + SDValue N1_2 = N1->getOperand(2); + if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { + // Create the actual and node if we can generate good code for it. + if (!normalizeToSequence) { + SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, + N2, Flags); + } + // Otherwise see if we can optimize the "and" to a better pattern. + if (SDValue Combined = visitANDLike(N0, N1_0, N)) { + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1, + N2, Flags); + } + } + } + // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y + if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { + SDValue N2_0 = N2->getOperand(0); + SDValue N2_1 = N2->getOperand(1); + SDValue N2_2 = N2->getOperand(2); + if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { + // Create the actual or node if we can generate good code for it. + if (!normalizeToSequence) { + SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, + N2_2, Flags); + } + // Otherwise see if we can optimize to a better pattern. + if (SDValue Combined = visitORLike(N0, N2_0, N)) + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, + N2_2, Flags); + } + } + } + + // select (not Cond), N1, N2 -> select Cond, N2, N1 + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { + SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); + SelectOp->setFlags(Flags); + return SelectOp; + } + + // Fold selects based on a setcc into other things, such as min/max/abs. + if (N0.getOpcode() == ISD::SETCC) { + SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + + // select (fcmp lt x, y), x, y -> fminnum x, y + // select (fcmp gt x, y), x, y -> fmaxnum x, y + // + // This is OK if we don't care what happens if either operand is a NaN. + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)) + if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, + CC, TLI, DAG)) + return FMinMax; + + // Use 'unsigned add with overflow' to optimize an unsigned saturating add. + // This is conservatively limited to pre-legal-operations to give targets + // a chance to reverse the transform if they want to do that. Also, it is + // unlikely that the pattern would be formed late, so it's probably not + // worth going through the other checks. + if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) && + CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) && + N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) { + auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1)); + auto *NotC = dyn_cast<ConstantSDNode>(Cond1); + if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) { + // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) --> + // uaddo Cond0, C; select uaddo.1, -1, uaddo.0 + // + // The IR equivalent of this transform would have this form: + // %a = add %x, C + // %c = icmp ugt %x, ~C + // %r = select %c, -1, %a + // => + // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C) + // %u0 = extractvalue %u, 0 + // %u1 = extractvalue %u, 1 + // %r = select %u1, -1, %u0 + SDVTList VTs = DAG.getVTList(VT, VT0); + SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1)); + return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0)); + } + } + + if (TLI.isOperationLegal(ISD::SELECT_CC, VT) || + (!LegalOperations && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) { + // Any flags available in a select/setcc fold will be on the setcc as they + // migrated from fcmp + Flags = N0.getNode()->getFlags(); + SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, + N2, N0.getOperand(2)); + SelectNode->setFlags(Flags); + return SelectNode; + } + + return SimplifySelect(DL, N0, N1, N2); + } + + return SDValue(); +} + +// This function assumes all the vselect's arguments are CONCAT_VECTOR +// nodes and that the condition is a BV of ConstantSDNodes (or undefs). +static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + SDValue Cond = N->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + EVT VT = N->getValueType(0); + int NumElems = VT.getVectorNumElements(); + assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && + RHS.getOpcode() == ISD::CONCAT_VECTORS && + Cond.getOpcode() == ISD::BUILD_VECTOR); + + // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about + // binary ones here. + if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) + return SDValue(); + + // We're sure we have an even number of elements due to the + // concat_vectors we have as arguments to vselect. + // Skip BV elements until we find one that's not an UNDEF + // After we find an UNDEF element, keep looping until we get to half the + // length of the BV and see if all the non-undef nodes are the same. + ConstantSDNode *BottomHalf = nullptr; + for (int i = 0; i < NumElems / 2; ++i) { + if (Cond->getOperand(i)->isUndef()) + continue; + + if (BottomHalf == nullptr) + BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != BottomHalf) + return SDValue(); + } + + // Do the same for the second half of the BuildVector + ConstantSDNode *TopHalf = nullptr; + for (int i = NumElems / 2; i < NumElems; ++i) { + if (Cond->getOperand(i)->isUndef()) + continue; + + if (TopHalf == nullptr) + TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != TopHalf) + return SDValue(); + } + + assert(TopHalf && BottomHalf && + "One half of the selector was all UNDEFs and the other was all the " + "same value. This should have been addressed before this function."); + return DAG.getNode( + ISD::CONCAT_VECTORS, DL, VT, + BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), + TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); +} + +SDValue DAGCombiner::visitMSCATTER(SDNode *N) { + MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); + SDValue Mask = MSC->getMask(); + SDValue Chain = MSC->getChain(); + SDLoc DL(N); + + // Zap scatters with a zero mask. + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return Chain; + + return SDValue(); +} + +SDValue DAGCombiner::visitMSTORE(SDNode *N) { + MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); + SDValue Mask = MST->getMask(); + SDValue Chain = MST->getChain(); + SDLoc DL(N); + + // Zap masked stores with a zero mask. + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return Chain; + + return SDValue(); +} + +SDValue DAGCombiner::visitMGATHER(SDNode *N) { + MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N); + SDValue Mask = MGT->getMask(); + SDLoc DL(N); + + // Zap gathers with a zero mask. + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return CombineTo(N, MGT->getPassThru(), MGT->getChain()); + + return SDValue(); +} + +SDValue DAGCombiner::visitMLOAD(SDNode *N) { + MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); + SDValue Mask = MLD->getMask(); + SDLoc DL(N); + + // Zap masked loads with a zero mask. + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return CombineTo(N, MLD->getPassThru(), MLD->getChain()); + + return SDValue(); +} + +/// A vector select of 2 constant vectors can be simplified to math/logic to +/// avoid a variable select instruction and possibly avoid constant loads. +SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { + SDValue Cond = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 || + !TLI.convertSelectOfConstantsToMath(VT) || + !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || + !ISD::isBuildVectorOfConstantSDNodes(N2.getNode())) + return SDValue(); + + // Check if we can use the condition value to increment/decrement a single + // constant value. This simplifies a select to an add and removes a constant + // load/materialization from the general case. + bool AllAddOne = true; + bool AllSubOne = true; + unsigned Elts = VT.getVectorNumElements(); + for (unsigned i = 0; i != Elts; ++i) { + SDValue N1Elt = N1.getOperand(i); + SDValue N2Elt = N2.getOperand(i); + if (N1Elt.isUndef() || N2Elt.isUndef()) + continue; + + const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue(); + const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue(); + if (C1 != C2 + 1) + AllAddOne = false; + if (C1 != C2 - 1) + AllSubOne = false; + } + + // Further simplifications for the extra-special cases where the constants are + // all 0 or all -1 should be implemented as folds of these patterns. + SDLoc DL(N); + if (AllAddOne || AllSubOne) { + // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C + // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C + auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; + SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond); + return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); + } + + // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C) + APInt Pow2C; + if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() && + isNullOrNullSplat(N2)) { + SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + + // The general case for select-of-constants: + // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 + // ...but that only makes sense if a vselect is slower than 2 logic ops, so + // leave that to a machine-specific pass. + return SDValue(); +} + +SDValue DAGCombiner::visitVSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + if (SDValue V = DAG.simplifySelect(N0, N1, N2)) + return V; + + // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1 + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) + return DAG.getSelect(DL, VT, F, N2, N1); + + // Canonicalize integer abs. + // vselect (setg[te] X, 0), X, -X -> + // vselect (setgt X, -1), X, -X -> + // vselect (setl[te] X, 0), -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N0.getOpcode() == ISD::SETCC) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + bool isAbs = false; + bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + + if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && + N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); + else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && + N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); + + if (isAbs) { + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) + return DAG.getNode(ISD::ABS, DL, VT, LHS); + + SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + DL, getShiftAmountTy(VT))); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); + AddToWorklist(Shift.getNode()); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); + } + + // vselect x, y (fcmp lt x, y) -> fminnum x, y + // vselect x, y (fcmp gt x, y) -> fmaxnum x, y + // + // This is OK if we don't care about what happens if either operand is a + // NaN. + // + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { + if (SDValue FMinMax = + combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG)) + return FMinMax; + } + + // If this select has a condition (setcc) with narrower operands than the + // select, try to widen the compare to match the select width. + // TODO: This should be extended to handle any constant. + // TODO: This could be extended to handle non-loading patterns, but that + // requires thorough testing to avoid regressions. + if (isNullOrNullSplat(RHS)) { + EVT NarrowVT = LHS.getValueType(); + EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger(); + EVT SetCCVT = getSetCCResultType(LHS.getValueType()); + unsigned SetCCWidth = SetCCVT.getScalarSizeInBits(); + unsigned WideWidth = WideVT.getScalarSizeInBits(); + bool IsSigned = isSignedIntSetCC(CC); + auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() && + SetCCWidth != 1 && SetCCWidth < WideWidth && + TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) && + TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) { + // Both compare operands can be widened for free. The LHS can use an + // extended load, and the RHS is a constant: + // vselect (ext (setcc load(X), C)), N1, N2 --> + // vselect (setcc extload(X), C'), N1, N2 + auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS); + SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS); + EVT WideSetCCVT = getSetCCResultType(WideVT); + SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC); + return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2); + } + } + } + + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + + // Fold (vselect (build_vector all_ones), N1, N2) -> N1 + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N2; + + // The ConvertSelectToConcatVector function is assuming both the above + // checks for (vselect (build_vector all{ones,zeros) ...) have been made + // and addressed. + if (N1.getOpcode() == ISD::CONCAT_VECTORS && + N2.getOpcode() == ISD::CONCAT_VECTORS && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) + return CV; + } + + if (SDValue V = foldVSelectOfConstants(N)) + return V; + + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + SDValue N4 = N->getOperand(4); + ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); + + // fold select_cc lhs, rhs, x, x, cc -> x + if (N2 == N3) + return N2; + + // Determine if the condition we're dealing with is constant + if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, + CC, SDLoc(N), false)) { + AddToWorklist(SCC.getNode()); + + if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } else if (SCC->isUndef()) { + // When the condition is UNDEF, just return the first operand. This is + // coherent the DAG creation, no setcc node is created in this case + return N2; + } else if (SCC.getOpcode() == ISD::SETCC) { + // Fold to a simpler select_cc + SDValue SelectOp = DAG.getNode( + ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0), + SCC.getOperand(1), N2, N3, SCC.getOperand(2)); + SelectOp->setFlags(SCC->getFlags()); + return SelectOp; + } + } + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N2, N3)) + return SDValue(N, 0); // Don't revisit N. + + // fold select_cc into other things, such as min/max/abs + return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); +} + +SDValue DAGCombiner::visitSETCC(SDNode *N) { + // setcc is very commonly used as an argument to brcond. This pattern + // also lend itself to numerous combines and, as a result, it is desired + // we keep the argument to a brcond as a setcc as much as possible. + bool PreferSetCC = + N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; + + SDValue Combined = SimplifySetCC( + N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC); + + if (!Combined) + return SDValue(); + + // If we prefer to have a setcc, and we don't, we'll try our best to + // recreate one using rebuildSetCC. + if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { + SDValue NewSetCC = rebuildSetCC(Combined); + + // We don't have anything interesting to combine to. + if (NewSetCC.getNode() == N) + return SDValue(); + + if (NewSetCC) + return NewSetCC; + } + + return Combined; +} + +SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + + // If Carry is false, fold to a regular SETCC. + if (isNullConstant(Carry)) + return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); + + return SDValue(); +} + +/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or +/// a build_vector of constants. +/// This function is called by the DAGCombiner when visiting sext/zext/aext +/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). +/// Vector extends are not folded if operations are legal; this is to +/// avoid introducing illegal build_vector dag nodes. +static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, + SelectionDAG &DAG, bool LegalTypes) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || + Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || + Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) + && "Expected EXTEND dag node in input!"); + + // fold (sext c1) -> c1 + // fold (zext c1) -> c1 + // fold (aext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(Opcode, DL, VT, N0); + + // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2) + // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2) + // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2) + if (N0->getOpcode() == ISD::SELECT) { + SDValue Op1 = N0->getOperand(1); + SDValue Op2 = N0->getOperand(2); + if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) && + (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) { + // For any_extend, choose sign extension of the constants to allow a + // possible further transform to sign_extend_inreg.i.e. + // + // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0> + // t2: i64 = any_extend t1 + // --> + // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0> + // --> + // t4: i64 = sign_extend_inreg t3 + unsigned FoldOpc = Opcode; + if (FoldOpc == ISD::ANY_EXTEND) + FoldOpc = ISD::SIGN_EXTEND; + return DAG.getSelect(DL, VT, N0->getOperand(0), + DAG.getNode(FoldOpc, DL, VT, Op1), + DAG.getNode(FoldOpc, DL, VT, Op2)); + } + } + + // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) + EVT SVT = VT.getScalarType(); + if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) + return SDValue(); + + // We can fold this node into a build_vector. + unsigned VTBits = SVT.getSizeInBits(); + unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits(); + SmallVector<SDValue, 8> Elts; + unsigned NumElts = VT.getVectorNumElements(); + + // For zero-extensions, UNDEF elements still guarantee to have the upper + // bits set to zero. + bool IsZext = + Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG; + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = N0.getOperand(i); + if (Op.isUndef()) { + Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT)); + continue; + } + + SDLoc DL(Op); + // Get the constant value and if needed trunc it to the size of the type. + // Nodes like build_vector might have constants wider than the scalar type. + APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits); + if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); + else + Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); + } + + return DAG.getBuildVector(VT, DL, Elts); +} + +// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: +// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" +// transformation. Returns true if extension are possible and the above +// mentioned transformation is profitable. +static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, + unsigned ExtOpc, + SmallVectorImpl<SDNode *> &ExtendNodes, + const TargetLowering &TLI) { + bool HasCopyToRegUses = false; + bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType()); + for (SDNode::use_iterator UI = N0.getNode()->use_begin(), + UE = N0.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User == N) + continue; + if (UI.getUse().getResNo() != N0.getResNo()) + continue; + // FIXME: Only extend SETCC N, N and SETCC N, c for now. + if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); + if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) + // Sign bits will be lost after a zext. + return false; + bool Add = false; + for (unsigned i = 0; i != 2; ++i) { + SDValue UseOp = User->getOperand(i); + if (UseOp == N0) + continue; + if (!isa<ConstantSDNode>(UseOp)) + return false; + Add = true; + } + if (Add) + ExtendNodes.push_back(User); + continue; + } + // If truncates aren't free and there are users we can't + // extend, it isn't worthwhile. + if (!isTruncFree) + return false; + // Remember if this value is live-out. + if (User->getOpcode() == ISD::CopyToReg) + HasCopyToRegUses = true; + } + + if (HasCopyToRegUses) { + bool BothLiveOut = false; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { + BothLiveOut = true; + break; + } + } + if (BothLiveOut) + // Both unextended and extended values are live out. There had better be + // a good reason for the transformation. + return ExtendNodes.size(); + } + return true; +} + +void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue OrigLoad, SDValue ExtLoad, + ISD::NodeType ExtType) { + // Extend SetCC uses if necessary. + SDLoc DL(ExtLoad); + for (SDNode *SetCC : SetCCs) { + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == OrigLoad) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); + } +} + +// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). +SDValue DAGCombiner::CombineExtLoad(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT DstVT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); + + assert((N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND) && + "Unexpected node type (not an extend)!"); + + // fold (sext (load x)) to multiple smaller sextloads; same for zext. + // For example, on a target with legal v4i32, but illegal v8i32, turn: + // (v8i32 (sext (v8i16 (load x)))) + // into: + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))) + // Where uses of the original load, i.e.: + // (v8i16 (load x)) + // are replaced with: + // (v8i16 (truncate + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))))) + // + // This combine is only applicable to illegal, but splittable, vectors. + // All legal types, and illegal non-vector types, are handled elsewhere. + // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. + // + if (N0->getOpcode() != ISD::LOAD) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + + if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || + !N0.hasOneUse() || !LN0->isSimple() || + !DstVT.isVector() || !DstVT.isPow2VectorType() || + !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SmallVector<SDNode *, 4> SetCCs; + if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI)) + return SDValue(); + + ISD::LoadExtType ExtType = + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + + // Try to split the vector types to get down to legal types. + EVT SplitSrcVT = SrcVT; + EVT SplitDstVT = DstVT; + while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && + SplitSrcVT.getVectorNumElements() > 1) { + SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; + SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; + } + + if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) + return SDValue(); + + SDLoc DL(N); + const unsigned NumSplits = + DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); + const unsigned Stride = SplitSrcVT.getStoreSize(); + SmallVector<SDValue, 4> Loads; + SmallVector<SDValue, 4> Chains; + + SDValue BasePtr = LN0->getBasePtr(); + for (unsigned Idx = 0; Idx < NumSplits; Idx++) { + const unsigned Offset = Idx * Stride; + const unsigned Align = MinAlign(LN0->getAlignment(), Offset); + + SDValue SplitLoad = DAG.getExtLoad( + ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr, + LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); + + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, DL, BasePtr.getValueType())); + + Loads.push_back(SplitLoad.getValue(0)); + Chains.push_back(SplitLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); + + // Simplify TF. + AddToWorklist(NewChain.getNode()); + + CombineTo(N, NewValue); + + // Replace uses of the original load (before extension) + // with a truncate of the concatenated sextloaded vectors. + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); + ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode()); + CombineTo(N0.getNode(), Trunc, NewChain); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> +// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) +SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { + assert(N->getOpcode() == ISD::ZERO_EXTEND); + EVT VT = N->getValueType(0); + EVT OrigVT = N->getOperand(0).getValueType(); + if (TLI.isZExtFree(OrigVT, VT)) + return SDValue(); + + // and/or/xor + SDValue N0 = N->getOperand(0); + if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) || + N0.getOperand(1).getOpcode() != ISD::Constant || + (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT))) + return SDValue(); + + // shl/shr + SDValue N1 = N0->getOperand(0); + if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) || + N1.getOperand(1).getOpcode() != ISD::Constant || + (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT))) + return SDValue(); + + // load + if (!isa<LoadSDNode>(N1.getOperand(0))) + return SDValue(); + LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0)); + EVT MemVT = Load->getMemoryVT(); + if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) || + Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed()) + return SDValue(); + + + // If the shift op is SHL, the logic op must be AND, otherwise the result + // will be wrong. + if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND) + return SDValue(); + + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + + SmallVector<SDNode*, 4> SetCCs; + if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI)) + return SDValue(); + + // Actually do the transformation. + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT, + Load->getChain(), Load->getBasePtr(), + Load->getMemoryVT(), Load->getMemOperand()); + + SDLoc DL1(N1); + SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad, + N1.getOperand(1)); + + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDLoc DL0(N0); + SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift, + DAG.getConstant(Mask, DL0, VT)); + + ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); + CombineTo(N, And); + if (SDValue(Load, 0).hasOneUse()) { + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load), + Load->getValueType(0), ExtLoad); + CombineTo(Load, Trunc, ExtLoad.getValue(1)); + } + + // N0 is dead at this point. + recursivelyDeleteUnusedNodes(N0.getNode()); + + return SDValue(N,0); // Return N so it doesn't get rechecked! +} + +/// If we're narrowing or widening the result of a vector select and the final +/// size is the same size as a setcc (compare) feeding the select, then try to +/// apply the cast operation to the select's operands because matching vector +/// sizes for a select condition and other operands should be more efficient. +SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { + unsigned CastOpcode = Cast->getOpcode(); + assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || + CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || + CastOpcode == ISD::FP_ROUND) && + "Unexpected opcode for vector select narrowing/widening"); + + // We only do this transform before legal ops because the pattern may be + // obfuscated by target-specific operations after legalization. Do not create + // an illegal select op, however, because that may be difficult to lower. + EVT VT = Cast->getValueType(0); + if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) + return SDValue(); + + SDValue VSel = Cast->getOperand(0); + if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() || + VSel.getOperand(0).getOpcode() != ISD::SETCC) + return SDValue(); + + // Does the setcc have the same vector size as the casted select? + SDValue SetCC = VSel.getOperand(0); + EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType()); + if (SetCCVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B) + SDValue A = VSel.getOperand(1); + SDValue B = VSel.getOperand(2); + SDValue CastA, CastB; + SDLoc DL(Cast); + if (CastOpcode == ISD::FP_ROUND) { + // FP_ROUND (fptrunc) has an extra flag operand to pass along. + CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1)); + CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1)); + } else { + CastA = DAG.getNode(CastOpcode, DL, VT, A); + CastB = DAG.getNode(CastOpcode, DL, VT, B); + } + return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB); +} + +// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) +// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) +static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, + const TargetLowering &TLI, EVT VT, + bool LegalOperations, SDNode *N, + SDValue N0, ISD::LoadExtType ExtLoadType) { + SDNode *N0Node = N0.getNode(); + bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node) + : ISD::isZEXTLoad(N0Node); + if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) || + !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse()) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + if ((LegalOperations || !LN0->isSimple() || + VT.isVector()) && + !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) + return SDValue(); + + SDValue ExtLoad = + DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), + LN0->getBasePtr(), MemVT, LN0->getMemOperand()); + Combiner.CombineTo(N, ExtLoad); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + if (LN0->use_empty()) + Combiner.recursivelyDeleteUnusedNodes(LN0); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x))) +// Only generate vector extloads when 1) they're legal, and 2) they are +// deemed desirable by the target. +static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, + const TargetLowering &TLI, EVT VT, + bool LegalOperations, SDNode *N, SDValue N0, + ISD::LoadExtType ExtLoadType, + ISD::NodeType ExtOpc) { + if (!ISD::isNON_EXTLoad(N0.getNode()) || + !ISD::isUNINDEXEDLoad(N0.getNode()) || + ((LegalOperations || VT.isVector() || + !cast<LoadSDNode>(N0)->isSimple()) && + !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) + return {}; + + bool DoXform = true; + SmallVector<SDNode *, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); + if (!DoXform) + return {}; + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); + Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc); + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + Combiner.CombineTo(N, ExtLoad); + if (NoReplaceTrunc) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + Combiner.recursivelyDeleteUnusedNodes(LN0); + } else { + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); + Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, + const TargetLowering &TLI, EVT VT, + SDNode *N, SDValue N0, + ISD::LoadExtType ExtLoadType, + ISD::NodeType ExtOpc) { + if (!N0.hasOneUse()) + return SDValue(); + + MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0); + if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + + if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0))) + return SDValue(); + + if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SDLoc dl(Ld); + SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); + SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(), + Ld->getBasePtr(), Ld->getMask(), + PassThru, Ld->getMemoryVT(), + Ld->getMemOperand(), ExtLoadType, + Ld->isExpandingLoad()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); + return NewLoad; +} + +static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, + bool LegalOperations) { + assert((N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"); + + SDValue SetCC = N->getOperand(0); + if (LegalOperations || SetCC.getOpcode() != ISD::SETCC || + !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1) + return SDValue(); + + SDValue X = SetCC.getOperand(0); + SDValue Ones = SetCC.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); + EVT VT = N->getValueType(0); + EVT XVT = X.getValueType(); + // setge X, C is canonicalized to setgt, so we do not need to match that + // pattern. The setlt sibling is folded in SimplifySelectCC() because it does + // not require the 'not' op. + if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) { + // Invert and smear/shift the sign bit: + // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) + // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) + SDLoc DL(N); + SDValue NotX = DAG.getNOT(DL, X, VT); + SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); + auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; + return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); + } + return SDValue(); +} + +SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; + + // fold (sext (sext x)) -> (sext x) + // fold (sext (aext x)) -> (sext x) + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0)); + + if (N0.getOpcode() == ISD::TRUNCATE) { + // fold (sext (truncate (load x))) -> (sext (smaller load x)) + // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + SDNode *oye = N0.getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorklist(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // See if the value being truncated is already sign extended. If so, just + // eliminate the trunc/sext pair. + SDValue Op = N0.getOperand(0); + unsigned OpBits = Op.getScalarValueSizeInBits(); + unsigned MidBits = N0.getScalarValueSizeInBits(); + unsigned DestBits = VT.getScalarSizeInBits(); + unsigned NumSignBits = DAG.ComputeNumSignBits(Op); + + if (OpBits == DestBits) { + // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign + // bits, it is already ready. + if (NumSignBits > DestBits-MidBits) + return Op; + } else if (OpBits < DestBits) { + // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign + // bits, just sext from i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op); + } else { + // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign + // bits, just truncate to i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + } + + // fold (sext (truncate x)) -> (sextinreg x). + if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, + N0.getValueType())) { + if (OpBits < DestBits) + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); + else if (OpBits > DestBits) + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op, + DAG.getValueType(N0.getValueType())); + } + } + + // Try to simplify (sext (load x)). + if (SDValue foldedExt = + tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, + ISD::SEXTLOAD, ISD::SIGN_EXTEND)) + return foldedExt; + + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD, + ISD::SIGN_EXTEND)) + return foldedExt; + + // fold (sext (load x)) to multiple smaller sextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + + // Try to simplify (sext (sextload x)). + if (SDValue foldedExt = tryToFoldExtOfExtload( + DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD)) + return foldedExt; + + // fold (sext (and/or/xor (load x), cst)) -> + // (and/or/xor (sextload x), (sext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); + EVT MemVT = LN00->getMemoryVT(); + if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) && + LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) { + SmallVector<SDNode*, 4> SetCCs; + bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), + ISD::SIGN_EXTEND, SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT, + LN00->getChain(), LN00->getBasePtr(), + LN00->getMemoryVT(), + LN00->getMemOperand()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.sext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); + ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND); + bool NoReplaceTruncAnd = !N0.hasOneUse(); + bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); + CombineTo(N, And); + // If N0 has multiple uses, change other uses as well. + if (NoReplaceTruncAnd) { + SDValue TruncAnd = + DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); + CombineTo(N0.getNode(), TruncAnd); + } + if (NoReplaceTrunc) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), + LN00->getValueType(0), ExtLoad); + CombineTo(LN00, Trunc, ExtLoad.getValue(1)); + } + return SDValue(N,0); // Return N so it doesn't get rechecked! + } + } + } + + if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) + return V; + + if (N0.getOpcode() == ISD::SETCC) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + EVT N00VT = N0.getOperand(0).getValueType(); + + // sext(setcc) -> sext_in_reg(vsetcc) for vectors. + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations && + TLI.getBooleanContents(N00VT) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + // On some architectures (such as SSE/NEON/etc) the SETCC result type is + // of the same size as the compared operands. Only optimize sext(setcc()) + // if this is the case. + EVT SVT = getSetCCResultType(N00VT); + + // If we already have the desired type, don't change it. + if (SVT != N0.getValueType()) { + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == SVT.getSizeInBits()) + return DAG.getSetCC(DL, VT, N00, N01, CC); + + // If the desired elements are smaller or larger than the source + // elements, we can use a matching integer vector type and then + // truncate/sign extend. + EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); + if (SVT == MatchingVecType) { + SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC); + return DAG.getSExtOrTrunc(VsetCC, DL, VT); + } + } + } + + // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) + // Here, T can be 1 or -1, depending on the type of the setcc and + // getBooleanContents(). + unsigned SetCCWidth = N0.getScalarValueSizeInBits(); + + // To determine the "true" side of the select, we need to know the high bit + // of the value returned by the setcc if it evaluates to true. + // If the type of the setcc is i1, then the true case of the select is just + // sext(i1 1), that is, -1. + // If the type of the setcc is larger (say, i8) then the value of the high + // bit depends on getBooleanContents(), so ask TLI for a real "true" value + // of the appropriate width. + SDValue ExtTrueVal = (SetCCWidth == 1) + ? DAG.getAllOnesConstant(DL, VT) + : DAG.getBoolConstant(true, DL, VT, N00VT); + SDValue Zero = DAG.getConstant(0, DL, VT); + if (SDValue SCC = + SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) + return SCC; + + if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { + EVT SetCCVT = getSetCCResultType(N00VT); + // Don't do this transform for i1 because there's a select transform + // that would reverse it. + // TODO: We should not do this transform at all without a target hook + // because a sext is likely cheaper than a select? + if (SetCCVT.getScalarSizeInBits() != 1 && + (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) { + SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC); + return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero); + } + } + } + + // fold (sext x) -> (zext x) if the sign bit is known zero. + if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && + DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0); + + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + + // Eliminate this sign extend by doing a negation in the destination type: + // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64) + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && + isNullOrNullSplat(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND && + TLI.isOperationLegalOrCustom(ISD::SUB, VT)) { + SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext); + } + // Eliminate this sign extend by doing a decrement in the destination type: + // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1) + if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && + isAllOnesOrAllOnesSplat(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + TLI.isOperationLegalOrCustom(ISD::ADD, VT)) { + SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); + } + + return SDValue(); +} + +// isTruncateOf - If N is a truncate of some other value, return true, record +// the value being truncated in Op and which of Op's bits are zero/one in Known. +// This function computes KnownBits to avoid a duplicated call to +// computeKnownBits in the caller. +static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, + KnownBits &Known) { + if (N->getOpcode() == ISD::TRUNCATE) { + Op = N->getOperand(0); + Known = DAG.computeKnownBits(Op); + return true; + } + + if (N.getOpcode() != ISD::SETCC || + N.getValueType().getScalarType() != MVT::i1 || + cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE) + return false; + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + assert(Op0.getValueType() == Op1.getValueType()); + + if (isNullOrNullSplat(Op0)) + Op = Op1; + else if (isNullOrNullSplat(Op1)) + Op = Op0; + else + return false; + + Known = DAG.computeKnownBits(Op); + + return (Known.Zero | 1).isAllOnesValue(); +} + +SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; + + // fold (zext (zext x)) -> (zext x) + // fold (zext (aext x)) -> (zext x) + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, + N0.getOperand(0)); + + // fold (zext (truncate x)) -> (zext x) or + // (zext (truncate x)) -> (truncate x) + // This is valid when the truncated bits of x are already zero. + SDValue Op; + KnownBits Known; + if (isTruncateOf(DAG, N0, Op, Known)) { + APInt TruncatedBits = + (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ? + APInt(Op.getScalarValueSizeInBits(), 0) : + APInt::getBitsSet(Op.getScalarValueSizeInBits(), + N0.getScalarValueSizeInBits(), + std::min(Op.getScalarValueSizeInBits(), + VT.getScalarSizeInBits())); + if (TruncatedBits.isSubsetOf(Known.Zero)) + return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + } + + // fold (zext (truncate x)) -> (and x, mask) + if (N0.getOpcode() == ISD::TRUNCATE) { + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + SDNode *oye = N0.getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorklist(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + EVT SrcVT = N0.getOperand(0).getValueType(); + EVT MinVT = N0.getValueType(); + + // Try to mask before the extension to avoid having to generate a larger mask, + // possibly over several sub-vectors. + if (SrcVT.bitsLT(VT) && VT.isVector()) { + if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { + SDValue Op = N0.getOperand(0); + Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + AddToWorklist(Op.getNode()); + SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + // Transfer the debug info; the new node is equivalent to N0. + DAG.transferDbgValues(N0, ZExtOrTrunc); + return ZExtOrTrunc; + } + } + + if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { + SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); + AddToWorklist(Op.getNode()); + SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + // We may safely transfer the debug info describing the truncate node over + // to the equivalent and operation. + DAG.transferDbgValues(N0, And); + return And; + } + } + + // Fold (zext (and (trunc x), cst)) -> (and x, cst), + // if either of the casts is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { + SDValue X = N0.getOperand(0).getOperand(0); + X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); + } + + // Try to simplify (zext (load x)). + if (SDValue foldedExt = + tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, + ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) + return foldedExt; + + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD, + ISD::ZERO_EXTEND)) + return foldedExt; + + // fold (zext (load x)) to multiple smaller zextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + + // fold (zext (and/or/xor (load x), cst)) -> + // (and/or/xor (zextload x), (zext cst)) + // Unless (and (load x) cst) will match as a zextload already and has + // additional users. + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); + EVT MemVT = LN00->getMemoryVT(); + if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) && + LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) { + if (N0.getOpcode() == ISD::AND) { + auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); + EVT LoadResultTy = AndC->getValueType(0); + EVT ExtVT; + if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT)) + DoXform = false; + } + } + if (DoXform) + DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT, + LN00->getChain(), LN00->getBasePtr(), + LN00->getMemoryVT(), + LN00->getMemOperand()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDLoc DL(N); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); + ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); + bool NoReplaceTruncAnd = !N0.hasOneUse(); + bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); + CombineTo(N, And); + // If N0 has multiple uses, change other uses as well. + if (NoReplaceTruncAnd) { + SDValue TruncAnd = + DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); + CombineTo(N0.getNode(), TruncAnd); + } + if (NoReplaceTrunc) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), + LN00->getValueType(0), ExtLoad); + CombineTo(LN00, Trunc, ExtLoad.getValue(1)); + } + return SDValue(N,0); // Return N so it doesn't get rechecked! + } + } + } + + // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> + // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) + if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N)) + return ZExtLoad; + + // Try to simplify (zext (zextload x)). + if (SDValue foldedExt = tryToFoldExtOfExtload( + DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD)) + return foldedExt; + + if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) + return V; + + if (N0.getOpcode() == ISD::SETCC) { + // Only do this before legalize for now. + if (!LegalOperations && VT.isVector() && + N0.getValueType().getVectorElementType() == MVT::i1) { + EVT N00VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N00VT) == N0.getValueType()) + return SDValue(); + + // We know that the # elements of the results is the same as the # + // elements of the compare (and the # elements of the compare result for + // that matter). Check to see that they are the same size. If so, we know + // that the element size of the sext'd result matches the element size of + // the compare operands. + SDLoc DL(N); + SDValue VecOnes = DAG.getConstant(1, DL, VT); + if (VT.getSizeInBits() == N00VT.getSizeInBits()) { + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), + N0.getOperand(1), N0.getOperand(2)); + return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes); + } + + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend. + EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); + SDValue VsetCC = + DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), N0.getOperand(2)); + return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT), + VecOnes); + } + + // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); + if (SDValue SCC = SimplifySelectCC( + DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) + return SCC; + } + + // (zext (shl (zext x), cst)) -> (shl (zext x), cst) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N0.hasOneUse()) { + SDValue ShAmt = N0.getOperand(1); + if (N0.getOpcode() == ISD::SHL) { + SDValue InnerZExt = N0.getOperand(0); + // If the original shl may be shifting out bits, do not perform this + // transformation. + unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() - + InnerZExt.getOperand(0).getValueSizeInBits(); + if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits)) + return SDValue(); + } + + SDLoc DL(N); + + // Ensure that the shift amount is wide enough for the shifted value. + if (VT.getSizeInBits() >= 256) + ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); + + return DAG.getNode(N0.getOpcode(), DL, VT, + DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), + ShAmt); + } + + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + + return SDValue(); +} + +SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; + + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + if (N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); + + // fold (aext (truncate (load x))) -> (aext (smaller load x)) + // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + SDNode *oye = N0.getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorklist(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (aext (truncate x)) + if (N0.getOpcode() == ISD::TRUNCATE) + return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); + + // Fold (aext (and (trunc x), cst)) -> (and x, cst) + // if the trunc is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType())) { + SDLoc DL(N); + SDValue X = N0.getOperand(0).getOperand(0); + X = DAG.getAnyExtOrTrunc(X, DL, VT); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); + } + + // fold (aext (load x)) -> (aext (truncate (extload x))) + // None of the supported targets knows how to perform load and any_ext + // on vectors in one instruction. We only perform this transformation on + // scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ISD::isUNINDEXEDLoad(N0.getNode()) && + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, + TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); + ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND); + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceTrunc = N0.hasOneUse(); + CombineTo(N, ExtLoad); + if (NoReplaceTrunc) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + recursivelyDeleteUnusedNodes(LN0); + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), ExtLoad); + CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (aext (zextload x)) -> (aext (truncate (zextload x))) + // fold (aext (sextload x)) -> (aext (truncate (sextload x))) + // fold (aext ( extload x)) -> (aext (truncate (extload x))) + if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + ISD::LoadExtType ExtType = LN0->getExtensionType(); + EVT MemVT = LN0->getMemoryVT(); + if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { + SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), + VT, LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + CombineTo(N, ExtLoad); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + recursivelyDeleteUnusedNodes(LN0); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + if (N0.getOpcode() == ISD::SETCC) { + // For vectors: + // aext(setcc) -> vsetcc + // aext(setcc) -> truncate(vsetcc) + // aext(setcc) -> aext(vsetcc) + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N00VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N00VT) == N0.getValueType()) + return SDValue(); + + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == N00VT.getSizeInBits()) + return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/any extend + EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); + SDValue VsetCC = + DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); + } + + // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); + if (SDValue SCC = SimplifySelectCC( + DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) + return SCC; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitAssertExt(SDNode *N) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT AssertVT = cast<VTSDNode>(N1)->getVT(); + + // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt) + if (N0.getOpcode() == Opcode && + AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT()) + return N0; + + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == Opcode) { + // We have an assert, truncate, assert sandwich. Make one stronger assert + // by asserting on the smallest asserted type to the larger source type. + // This eliminates the later assert: + // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN + // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN + SDValue BigA = N0.getOperand(0); + EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); + assert(BigA_AssertVT.bitsLE(N0.getValueType()) && + "Asserting zero/sign-extended bits to a type larger than the " + "truncated destination does not provide information"); + + SDLoc DL(N); + EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT; + SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT); + SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), + BigA.getOperand(0), MinAssertVTVal); + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); + } + + // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller + // than X. Just move the AssertZext in front of the truncate and drop the + // AssertSExt. + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::AssertSext && + Opcode == ISD::AssertZext) { + SDValue BigA = N0.getOperand(0); + EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); + assert(BigA_AssertVT.bitsLE(N0.getValueType()) && + "Asserting zero/sign-extended bits to a type larger than the " + "truncated destination does not provide information"); + + if (AssertVT.bitsLT(BigA_AssertVT)) { + SDLoc DL(N); + SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), + BigA.getOperand(0), N1); + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); + } + } + + return SDValue(); +} + +/// If the result of a wider load is shifted to right of N bits and then +/// truncated to a narrower type and where N is a multiple of number of bits of +/// the narrower type, transform it to a narrower load from address + N / num of +/// bits of new type. Also narrow the load if the result is masked with an AND +/// to effectively produce a smaller type. If the result is to be extended, also +/// fold the extension to form a extending load. +SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { + unsigned Opc = N->getOpcode(); + + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT ExtVT = VT; + + // This transformation isn't valid for vector loads. + if (VT.isVector()) + return SDValue(); + + unsigned ShAmt = 0; + bool HasShiftedOffset = false; + // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then + // extended to VT. + if (Opc == ISD::SIGN_EXTEND_INREG) { + ExtType = ISD::SEXTLOAD; + ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + } else if (Opc == ISD::SRL) { + // Another special-case: SRL is basically zero-extending a narrower value, + // or it maybe shifting a higher subword, half or byte into the lowest + // bits. + ExtType = ISD::ZEXTLOAD; + N0 = SDValue(N, 0); + + auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0)); + auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01 || !LN0) + return SDValue(); + + uint64_t ShiftAmt = N01->getZExtValue(); + uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits(); + if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt) + ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt); + else + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() - ShiftAmt); + } else if (Opc == ISD::AND) { + // An AND with a constant mask is the same as a truncate + zero-extend. + auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!AndC) + return SDValue(); + + const APInt &Mask = AndC->getAPIntValue(); + unsigned ActiveBits = 0; + if (Mask.isMask()) { + ActiveBits = Mask.countTrailingOnes(); + } else if (Mask.isShiftedMask()) { + ShAmt = Mask.countTrailingZeros(); + APInt ShiftedMask = Mask.lshr(ShAmt); + ActiveBits = ShiftedMask.countTrailingOnes(); + HasShiftedOffset = true; + } else + return SDValue(); + + ExtType = ISD::ZEXTLOAD; + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + } + + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + SDValue SRL = N0; + if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { + ShAmt = ConstShift->getZExtValue(); + unsigned EVTBits = ExtVT.getSizeInBits(); + // Is the shift amount a multiple of size of VT? + if ((ShAmt & (EVTBits-1)) == 0) { + N0 = N0.getOperand(0); + // Is the load width a multiple of size of VT? + if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0) + return SDValue(); + } + + // At this point, we must have a load or else we can't do the transform. + if (!isa<LoadSDNode>(N0)) return SDValue(); + + auto *LN0 = cast<LoadSDNode>(N0); + + // Because a SRL must be assumed to *need* to zero-extend the high bits + // (as opposed to anyext the high bits), we can't combine the zextload + // lowering of SRL and an sextload. + if (LN0->getExtensionType() == ISD::SEXTLOAD) + return SDValue(); + + // If the shift amount is larger than the input type then we're not + // accessing any of the loaded bytes. If the load was a zextload/extload + // then the result of the shift+trunc is zero/undef (handled elsewhere). + if (ShAmt >= LN0->getMemoryVT().getSizeInBits()) + return SDValue(); + + // If the SRL is only used by a masking AND, we may be able to adjust + // the ExtVT to make the AND redundant. + SDNode *Mask = *(SRL->use_begin()); + if (Mask->getOpcode() == ISD::AND && + isa<ConstantSDNode>(Mask->getOperand(1))) { + const APInt &ShiftMask = + cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue(); + if (ShiftMask.isMask()) { + EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), + ShiftMask.countTrailingOnes()); + // If the mask is smaller, recompute the type. + if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) && + TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT)) + ExtVT = MaskedVT; + } + } + } + } + + // If the load is shifted left (and the result isn't shifted back right), + // we can fold the truncate through the shift. + unsigned ShLeftAmt = 0; + if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && + ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { + if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + ShLeftAmt = N01->getZExtValue(); + N0 = N0.getOperand(0); + } + } + + // If we haven't found a load, we can't narrow it. + if (!isa<LoadSDNode>(N0)) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + // Reducing the width of a volatile load is illegal. For atomics, we may be + // able to reduce the width provided we never widen again. (see D66309) + if (!LN0->isSimple() || + !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) + return SDValue(); + + auto AdjustBigEndianShift = [&](unsigned ShAmt) { + unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); + return LVTStoreBits - EVTStoreBits - ShAmt; + }; + + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (DAG.getDataLayout().isBigEndian()) + ShAmt = AdjustBigEndianShift(ShAmt); + + EVT PtrType = N0.getOperand(1).getValueType(); + uint64_t PtrOff = ShAmt / 8; + unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); + SDLoc DL(LN0); + // The original load itself didn't wrap, so an offset within it doesn't. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, + PtrType, LN0->getBasePtr(), + DAG.getConstant(PtrOff, DL, PtrType), + Flags); + AddToWorklist(NewPtr.getNode()); + + SDValue Load; + if (ExtType == ISD::NON_EXTLOAD) + Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); + else + Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, + NewAlign, LN0->getMemOperand()->getFlags(), + LN0->getAAInfo()); + + // Replace the old load's chain with the new load's chain. + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); + + // Shift the result left, if we've swallowed a left shift. + SDValue Result = Load; + if (ShLeftAmt != 0) { + EVT ShImmTy = getShiftAmountTy(Result.getValueType()); + if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) + ShImmTy = VT; + // If the shift amount is as large as the result size (but, presumably, + // no larger than the source) then the useful bits of the result are + // zero; we can't simply return the shortened shift, because the result + // of that operation is undefined. + if (ShLeftAmt >= VT.getSizeInBits()) + Result = DAG.getConstant(0, DL, VT); + else + Result = DAG.getNode(ISD::SHL, DL, VT, + Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); + } + + if (HasShiftedOffset) { + // Recalculate the shift amount after it has been altered to calculate + // the offset. + if (DAG.getDataLayout().isBigEndian()) + ShAmt = AdjustBigEndianShift(ShAmt); + + // We're using a shifted mask, so the load now has an offset. This means + // that data has been loaded into the lower bytes than it would have been + // before, so we need to shl the loaded data into the correct position in the + // register. + SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT); + Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + } + + // Return the new loaded value. + return Result; +} + +SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT EVT = cast<VTSDNode>(N1)->getVT(); + unsigned VTBits = VT.getScalarSizeInBits(); + unsigned EVTBits = EVT.getScalarSizeInBits(); + + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + // fold (sext_in_reg c1) -> c1 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); + + // If the input is already sign extended, just drop the extension. + if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) + return N0; + + // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, + N0.getOperand(0), N1); + + // fold (sext_in_reg (sext x)) -> (sext x) + // fold (sext_in_reg (aext x)) -> (sext x) + // if x is small enough or if we know that x has more than 1 sign bit and the + // sign_extend_inreg is extending from one of them. + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N00 = N0.getOperand(0); + unsigned N00Bits = N00.getScalarValueSizeInBits(); + if ((N00Bits <= EVTBits || + (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) && + (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); + } + + // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x) + if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || + N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || + N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && + N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) { + if (!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)) + return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, + N0.getOperand(0)); + } + + // fold (sext_in_reg (zext x)) -> (sext x) + // iff we are extending the source sign bit. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getScalarValueSizeInBits() == EVTBits && + (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); + } + + // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. + if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1))) + return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType()); + + // fold operands of sext_in_reg based on knowledge that the top bits are not + // demanded. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (sext_in_reg (load x)) -> (smaller sextload x) + // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) + if (SDValue NarrowLoad = ReduceLoadWidth(N)) + return NarrowLoad; + + // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) + // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. + // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. + if (N0.getOpcode() == ISD::SRL) { + if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) { + // We can turn this into an SRA iff the input to the SRL is already sign + // extended enough. + unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); + if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits) + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), + N0.getOperand(1)); + } + } + + // fold (sext_inreg (extload x)) -> (sextload x) + // If sextload is not supported by target, we can only do the combine when + // load has one use. Doing otherwise can block folding the extload with other + // extends that the target does support. + if (ISD::isEXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() && + N0.hasOneUse()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + AddToWorklist(ExtLoad.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use + if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse() && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) + if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { + if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false)) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, + BSwap, N1); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; + + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; + + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); + bool isLE = DAG.getDataLayout().isLittleEndian(); + + // noop truncate + if (SrcVT == VT) + return N0; + + // fold (truncate (truncate x)) -> (truncate x) + if (N0.getOpcode() == ISD::TRUNCATE) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); + + // fold (truncate c1) -> c1 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { + SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); + if (C.getNode() != N) + return C; + } + + // fold (truncate (ext x)) -> (ext x) or (truncate x) or x + if (N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND) { + // if the source is smaller than the dest, we still need an extend. + if (N0.getOperand(0).getValueType().bitsLT(VT)) + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); + // if the source is larger than the dest, than we just need the truncate. + if (N0.getOperand(0).getValueType().bitsGT(VT)) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); + // if the source and dest are the same type, we can drop both the extend + // and the truncate. + return N0.getOperand(0); + } + + // If this is anyext(trunc), don't fold it, allow ourselves to be folded. + if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) + return SDValue(); + + // Fold extract-and-trunc into a narrow extract. For example: + // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) + // i32 y = TRUNCATE(i64 x) + // -- becomes -- + // v16i8 b = BITCAST (v2i64 val) + // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) + // + // Note: We only run this optimization after type legalization (which often + // creates this pattern) and before operation legalization after which + // we need to be more careful about the vector instructions that we generate. + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { + EVT VecTy = N0.getOperand(0).getValueType(); + EVT ExTy = N0.getValueType(); + EVT TrTy = N->getValueType(0); + + unsigned NumElem = VecTy.getVectorNumElements(); + unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); + assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); + + SDValue EltNo = N0->getOperand(1); + if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); + + SDLoc DL(N); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, + DAG.getBitcast(NVT, N0.getOperand(0)), + DAG.getConstant(Index, DL, IndexTy)); + } + } + + // trunc (select c, a, b) -> select c, (trunc a), (trunc b) + if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) { + if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && + TLI.isTruncateFree(SrcVT, VT)) { + SDLoc SL(N0); + SDValue Cond = N0.getOperand(0); + SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); + SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); + } + } + + // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() + if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && + (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) && + TLI.isTypeDesirableForOp(ISD::SHL, VT)) { + SDValue Amt = N0.getOperand(1); + KnownBits Known = DAG.computeKnownBits(Amt); + unsigned Size = VT.getScalarSizeInBits(); + if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { + SDLoc SL(N); + EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); + if (AmtVT != Amt.getValueType()) { + Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT); + AddToWorklist(Amt.getNode()); + } + return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt); + } + } + + // Attempt to pre-truncate BUILD_VECTOR sources. + if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && + TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) { + SDLoc DL(N); + EVT SVT = VT.getScalarType(); + SmallVector<SDValue, 8> TruncOps; + for (const SDValue &Op : N0->op_values()) { + SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op); + TruncOps.push_back(TruncOp); + } + return DAG.getBuildVector(VT, DL, TruncOps); + } + + // Fold a series of buildvector, bitcast, and truncate if possible. + // For example fold + // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to + // (2xi32 (buildvector x, y)). + if (Level == AfterLegalizeVectorOps && VT.isVector() && + N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + N0.getOperand(0).hasOneUse()) { + SDValue BuildVect = N0.getOperand(0); + EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); + EVT TruncVecEltTy = VT.getVectorElementType(); + + // Check that the element types match. + if (BuildVectEltTy == TruncVecEltTy) { + // Now we only need to compute the offset of the truncated elements. + unsigned BuildVecNumElts = BuildVect.getNumOperands(); + unsigned TruncVecNumElts = VT.getVectorNumElements(); + unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; + + assert((BuildVecNumElts % TruncVecNumElts) == 0 && + "Invalid number of elements"); + + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) + Opnds.push_back(BuildVect.getOperand(i)); + + return DAG.getBuildVector(VT, SDLoc(N), Opnds); + } + } + + // See if we can simplify the input to this truncate through knowledge that + // only the low bits are being used. + // For example "trunc (or (shl x, 8), y)" // -> trunc y + // Currently we only perform this optimization on scalars because vectors + // may have different active low bits. + if (!VT.isVector()) { + APInt Mask = + APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); + if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); + } + + // fold (truncate (load x)) -> (smaller load x) + // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { + if (SDValue Reduced = ReduceLoadWidth(N)) + return Reduced; + + // Handle the case where the load remains an extending load even + // after truncation. + if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (LN0->isSimple() && + LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { + SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getMemoryVT(), + LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); + return NewLoad; + } + } + } + + // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), + // where ... are all 'undef'. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { + SmallVector<EVT, 8> VTs; + SDValue V; + unsigned Idx = 0; + unsigned NumDefs = 0; + + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue X = N0.getOperand(i); + if (!X.isUndef()) { + V = X; + Idx = i; + NumDefs++; + } + // Stop if more than one members are non-undef. + if (NumDefs > 1) + break; + VTs.push_back(EVT::getVectorVT(*DAG.getContext(), + VT.getVectorElementType(), + X.getValueType().getVectorNumElements())); + } + + if (NumDefs == 0) + return DAG.getUNDEF(VT); + + if (NumDefs == 1) { + assert(V.getNode() && "The single defined operand is empty!"); + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + if (i != Idx) { + Opnds.push_back(DAG.getUNDEF(VTs[i])); + continue; + } + SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); + AddToWorklist(NV.getNode()); + Opnds.push_back(NV); + } + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); + } + } + + // Fold truncate of a bitcast of a vector to an extract of the low vector + // element. + // + // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx + if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { + SDValue VecSrc = N0.getOperand(0); + EVT SrcVT = VecSrc.getValueType(); + if (SrcVT.isVector() && SrcVT.getScalarType() == VT && + (!LegalOperations || + TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) { + SDLoc SL(N); + + EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1; + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, + VecSrc, DAG.getConstant(Idx, SL, IdxVT)); + } + } + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) + // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) + // When the adde's carry is not used. + if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && + N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && + // We only do for addcarry before legalize operation + ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) || + TLI.isOperationLegal(N0.getOpcode(), VT))) { + SDLoc SL(N); + auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); + auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); + auto VTs = DAG.getVTList(VT, N0->getValueType(1)); + return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); + } + + // fold (truncate (extract_subvector(ext x))) -> + // (extract_subvector x) + // TODO: This can be generalized to cover cases where the truncate and extract + // do not fully cancel each other out. + if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::SIGN_EXTEND || + N00.getOpcode() == ISD::ZERO_EXTEND || + N00.getOpcode() == ISD::ANY_EXTEND) { + if (N00.getOperand(0)->getValueType(0).getVectorElementType() == + VT.getVectorElementType()) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT, + N00.getOperand(0), N0.getOperand(1)); + } + } + + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + + // Narrow a suitable binary operation with a non-opaque constant operand by + // moving it ahead of the truncate. This is limited to pre-legalization + // because targets may prefer a wider type during later combines and invert + // this transform. + switch (N0.getOpcode()) { + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + if (!LegalOperations && N0.hasOneUse() && + (isConstantOrConstantVector(N0.getOperand(0), true) || + isConstantOrConstantVector(N0.getOperand(1), true))) { + // TODO: We already restricted this to pre-legalization, but for vectors + // we are extra cautious to not create an unsupported operation. + // Target-specific changes are likely needed to avoid regressions here. + if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) { + SDLoc DL(N); + SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); + SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1)); + return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR); + } + } + } + + return SDValue(); +} + +static SDNode *getBuildPairElt(SDNode *N, unsigned i) { + SDValue Elt = N->getOperand(i); + if (Elt.getOpcode() != ISD::MERGE_VALUES) + return Elt.getNode(); + return Elt.getOperand(Elt.getResNo()).getNode(); +} + +/// build_pair (load, load) -> load +/// if load locations are consecutive. +SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { + assert(N->getOpcode() == ISD::BUILD_PAIR); + + LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); + LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); + + // A BUILD_PAIR is always having the least significant part in elt 0 and the + // most significant part in elt 1. So when combining into one large load, we + // need to consider the endianness. + if (DAG.getDataLayout().isBigEndian()) + std::swap(LD1, LD2); + + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || + LD1->getAddressSpace() != LD2->getAddressSpace()) + return SDValue(); + EVT LD1VT = LD1->getValueType(0); + unsigned LD1Bytes = LD1VT.getStoreSize(); + if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && + DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { + unsigned Align = LD1->getAlignment(); + unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign <= Align && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), + LD1->getPointerInfo(), Align); + } + + return SDValue(); +} + +static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { + // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi + // and Lo parts; on big-endian machines it doesn't. + return DAG.getDataLayout().isBigEndian() ? 1 : 0; +} + +static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + // If this is not a bitcast to an FP type or if the target doesn't have + // IEEE754-compliant FP logic, we're done. + EVT VT = N->getValueType(0); + if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) + return SDValue(); + + // TODO: Handle cases where the integer constant is a different scalar + // bitwidth to the FP. + SDValue N0 = N->getOperand(0); + EVT SourceVT = N0.getValueType(); + if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits()) + return SDValue(); + + unsigned FPOpcode; + APInt SignMask; + switch (N0.getOpcode()) { + case ISD::AND: + FPOpcode = ISD::FABS; + SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits()); + break; + case ISD::XOR: + FPOpcode = ISD::FNEG; + SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); + break; + case ISD::OR: + FPOpcode = ISD::FABS; + SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); + break; + default: + return SDValue(); + } + + // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X + // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X + // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) -> + // fneg (fabs X) + SDValue LogicOp0 = N0.getOperand(0); + ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true); + if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && + LogicOp0.getOpcode() == ISD::BITCAST && + LogicOp0.getOperand(0).getValueType() == VT) { + SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0)); + NumFPLogicOpsConv++; + if (N0.getOpcode() == ISD::OR) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp); + return FPOp; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBITCAST(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + // If the input is a BUILD_VECTOR with all constant elements, fold this now. + // Only do this before legalize types, unless both types are integer and the + // scalar type is legal. Only do this before legalize ops, since the target + // maybe depending on the bitcast. + // First check to see if this is all constant. + // TODO: Support FP bitcasts after legalize types. + if (VT.isVector() && + (!LegalTypes || + (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() && + TLI.isTypeLegal(VT.getVectorElementType()))) && + N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && + cast<BuildVectorSDNode>(N0)->isConstant()) + return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), + VT.getVectorElementType()); + + // If the input is a constant, let getNode fold it. + if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { + // If we can't allow illegal operations, we need to check that this is just + // a fp -> int or int -> conversion and that the resulting operation will + // be legal. + if (!LegalOperations || + (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && + TLI.isOperationLegal(ISD::ConstantFP, VT)) || + (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && + TLI.isOperationLegal(ISD::Constant, VT))) { + SDValue C = DAG.getBitcast(VT, N0); + if (C.getNode() != N) + return C; + } + } + + // (conv (conv x, t1), t2) -> (conv x, t2) + if (N0.getOpcode() == ISD::BITCAST) + return DAG.getBitcast(VT, N0.getOperand(0)); + + // fold (conv (load x)) -> (load (conv*)x) + // If the resultant load doesn't need a higher alignment than the original! + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + // Do not remove the cast if the types differ in endian layout. + TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == + TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && + // If the load is volatile, we only want to change the load type if the + // resulting load is legal. Otherwise we might increase the number of + // memory accesses. We don't care if the original type was legal or not + // as we assume software couldn't rely on the number of accesses of an + // illegal type. + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) || + TLI.isOperationLegal(ISD::LOAD, VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + + if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG, + *LN0->getMemOperand())) { + SDValue Load = + DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), LN0->getAlignment(), + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); + return Load; + } + } + + if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI)) + return V; + + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) + // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // + // For ppc_fp128: + // fold (bitcast (fneg x)) -> + // flipbit = signbit + // (xor (bitcast x) (build_pair flipbit, flipbit)) + // + // fold (bitcast (fabs x)) -> + // flipbit = (and (extract_element (bitcast x), 0), signbit) + // (xor (bitcast x) (build_pair flipbit, flipbit)) + // This often reduces constant pool loads. + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && + N0.getNode()->hasOneUse() && VT.isInteger() && + !VT.isVector() && !N0.getValueType().isVector()) { + SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); + AddToWorklist(NewConv.getNode()); + + SDLoc DL(N); + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + assert(VT.getSizeInBits() == 128); + SDValue SignBit = DAG.getConstant( + APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); + SDValue FlipBit; + if (N0.getOpcode() == ISD::FNEG) { + FlipBit = SignBit; + AddToWorklist(FlipBit.getNode()); + } else { + assert(N0.getOpcode() == ISD::FABS); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(NewConv))); + AddToWorklist(Hi.getNode()); + FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); + AddToWorklist(FlipBit.getNode()); + } + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); + } + APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); + if (N0.getOpcode() == ISD::FNEG) + return DAG.getNode(ISD::XOR, DL, VT, + NewConv, DAG.getConstant(SignBit, DL, VT)); + assert(N0.getOpcode() == ISD::FABS); + return DAG.getNode(ISD::AND, DL, VT, + NewConv, DAG.getConstant(~SignBit, DL, VT)); + } + + // fold (bitconvert (fcopysign cst, x)) -> + // (or (and (bitconvert x), sign), (and cst, (not sign))) + // Note that we don't handle (copysign x, cst) because this can always be + // folded to an fneg or fabs. + // + // For ppc_fp128: + // fold (bitcast (fcopysign cst, x)) -> + // flipbit = (and (extract_element + // (xor (bitcast cst), (bitcast x)), 0), + // signbit) + // (xor (bitcast cst) (build_pair flipbit, flipbit)) + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(0)) && + VT.isInteger() && !VT.isVector()) { + unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits(); + EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); + if (isTypeLegal(IntXVT)) { + SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); + AddToWorklist(X.getNode()); + + // If X has a different width than the result/lhs, sext it or truncate it. + unsigned VTWidth = VT.getSizeInBits(); + if (OrigXWidth < VTWidth) { + X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); + AddToWorklist(X.getNode()); + } else if (OrigXWidth > VTWidth) { + // To get the sign bit in the right place, we have to shift it right + // before truncating. + SDLoc DL(X); + X = DAG.getNode(ISD::SRL, DL, + X.getValueType(), X, + DAG.getConstant(OrigXWidth-VTWidth, DL, + X.getValueType())); + AddToWorklist(X.getNode()); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); + AddToWorklist(X.getNode()); + } + + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2); + SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); + AddToWorklist(Cst.getNode()); + SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); + AddToWorklist(X.getNode()); + SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); + AddToWorklist(XorResult.getNode()); + SDValue XorResult64 = DAG.getNode( + ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(XorResult))); + AddToWorklist(XorResult64.getNode()); + SDValue FlipBit = + DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, + DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); + AddToWorklist(FlipBit.getNode()); + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); + } + APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); + X = DAG.getNode(ISD::AND, SDLoc(X), VT, + X, DAG.getConstant(SignBit, SDLoc(X), VT)); + AddToWorklist(X.getNode()); + + SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); + Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, + Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); + AddToWorklist(Cst.getNode()); + + return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); + } + } + + // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. + if (N0.getOpcode() == ISD::BUILD_PAIR) + if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) + return CombineLD; + + // Remove double bitcasts from shuffles - this is often a legacy of + // XformToShuffleWithZero being used to combine bitmaskings (of + // float vectors bitcast to integer vectors) into shuffles. + // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && + N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() && + VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && + !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); + + // If operands are a bitcast, peek through if it casts the original VT. + // If operands are a constant, just bitcast back to original VT. + auto PeekThroughBitcast = [&](SDValue Op) { + if (Op.getOpcode() == ISD::BITCAST && + Op.getOperand(0).getValueType() == VT) + return SDValue(Op.getOperand(0)); + if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) + return DAG.getBitcast(VT, Op); + return SDValue(); + }; + + // FIXME: If either input vector is bitcast, try to convert the shuffle to + // the result type of this bitcast. This would eliminate at least one + // bitcast. See the transform in InstCombine. + SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); + SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); + if (!(SV0 && SV1)) + return SDValue(); + + int MaskScale = + VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); + SmallVector<int, 8> NewMask; + for (int M : SVN->getMask()) + for (int i = 0; i != MaskScale; ++i) + NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); + + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { + EVT VT = N->getValueType(0); + return CombineConsecutiveLoads(N, VT); +} + +/// We know that BV is a build_vector node with Constant, ConstantFP or Undef +/// operands. DstEltVT indicates the destination element value type. +SDValue DAGCombiner:: +ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { + EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); + + // If this is already the right type, we're done. + if (SrcEltVT == DstEltVT) return SDValue(BV, 0); + + unsigned SrcBitSize = SrcEltVT.getSizeInBits(); + unsigned DstBitSize = DstEltVT.getSizeInBits(); + + // If this is a conversion of N elements of one type to N elements of another + // type, convert each element. This handles FP<->INT cases. + if (SrcBitSize == DstBitSize) { + SmallVector<SDValue, 8> Ops; + for (SDValue Op : BV->op_values()) { + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (Op.getValueType() != SrcEltVT) + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); + Ops.push_back(DAG.getBitcast(DstEltVT, Op)); + AddToWorklist(Ops.back().getNode()); + } + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + BV->getValueType(0).getVectorNumElements()); + return DAG.getBuildVector(VT, SDLoc(BV), Ops); + } + + // Otherwise, we're growing or shrinking the elements. To avoid having to + // handle annoying details of growing/shrinking FP values, we convert them to + // int first. + if (SrcEltVT.isFloatingPoint()) { + // Convert the input float vector to a int vector where the elements are the + // same sizes. + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); + BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); + SrcEltVT = IntVT; + } + + // Now we know the input is an integer vector. If the output is a FP type, + // convert to integer first, then to FP of the right size. + if (DstEltVT.isFloatingPoint()) { + EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); + SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); + + // Next, convert to FP elements of the same size. + return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); + } + + SDLoc DL(BV); + + // Okay, we know the src/dst types are both integers of differing types. + // Handling growing first. + assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); + if (SrcBitSize < DstBitSize) { + unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; + i += NumInputsPerOutput) { + bool isLE = DAG.getDataLayout().isLittleEndian(); + APInt NewBits = APInt(DstBitSize, 0); + bool EltIsUndef = true; + for (unsigned j = 0; j != NumInputsPerOutput; ++j) { + // Shift the previously computed bits over. + NewBits <<= SrcBitSize; + SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); + if (Op.isUndef()) continue; + EltIsUndef = false; + + NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). + zextOrTrunc(SrcBitSize).zext(DstBitSize); + } + + if (EltIsUndef) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); + } + + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); + return DAG.getBuildVector(VT, DL, Ops); + } + + // Finally, this must be the case where we are shrinking elements: each input + // turns into multiple outputs. + unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + NumOutputsPerInput*BV->getNumOperands()); + SmallVector<SDValue, 8> Ops; + + for (const SDValue &Op : BV->op_values()) { + if (Op.isUndef()) { + Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); + continue; + } + + APInt OpVal = cast<ConstantSDNode>(Op)-> + getAPIntValue().zextOrTrunc(SrcBitSize); + + for (unsigned j = 0; j != NumOutputsPerInput; ++j) { + APInt ThisVal = OpVal.trunc(DstBitSize); + Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); + OpVal.lshrInPlace(DstBitSize); + } + + // For big endian targets, swap the order of the pieces of each element. + if (DAG.getDataLayout().isBigEndian()) + std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); + } + + return DAG.getBuildVector(VT, DL, Ops); +} + +static bool isContractable(SDNode *N) { + SDNodeFlags F = N->getFlags(); + return F.hasAllowContract() || F.hasAllowReassociation(); +} + +/// Try to perform FMA combining on a given FADD node. +SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + const TargetOptions &Options = DAG.getTarget().Options; + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = + TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + SDNodeFlags Flags = N->getFlags(); + bool CanFuse = Options.UnsafeFPMath || isContractable(N); + bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || + CanFuse || HasFMAD); + // If the addition is not contractable, do not combine. + if (!AllowFusionGlobally && !isContractable(N)) + return SDValue(); + + const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); + if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + + // Is the node an FMUL and contractable either due to global flags or + // SDNodeFlags. + auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { + if (N.getOpcode() != ISD::FMUL) + return false; + return AllowFusionGlobally || isContractable(N.getNode()); + }; + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) { + if (N0.getNode()->use_size() > N1.getNode()->use_size()) + std::swap(N0, N1); + } + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), N1, Flags); + } + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) + // Note: Commutes FADD operands. + if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(0), N1.getOperand(1), N0, Flags); + } + + // Look through FP_EXTEND nodes to do more combining. + + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (isContractableFMUL(N00) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), N1, Flags); + } + } + + // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (isContractableFMUL(N10) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), N0, Flags); + } + } + + // More folding opportunities when target permits. + if (Aggressive) { + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (CanFuse && + N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1, Flags), Flags); + } + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (CanFuse && + N1->getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL && + N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0, Flags), Flags); + } + + + // fold (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + auto FoldFAddFMAFPExtFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, + SDNodeFlags Flags) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z, Flags), Flags); + }; + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (isContractableFMUL(N020) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { + return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), + N020.getOperand(0), N020.getOperand(1), + N1, Flags); + } + } + } + + // fold (fadd (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + auto FoldFAddFPExtFMAFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, + SDNodeFlags Flags) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, X), + DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z, Flags), Flags); + }; + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (isContractableFMUL(N002) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), + N002.getOperand(0), N002.getOperand(1), + N1, Flags); + } + } + } + + // fold (fadd x, (fma y, z, (fpext (fmul u, v))) + // -> (fma y, z, (fma (fpext u), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode) { + SDValue N12 = N1.getOperand(2); + if (N12.getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N12.getOperand(0); + if (isContractableFMUL(N120) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), + N120.getOperand(0), N120.getOperand(1), + N0, Flags); + } + } + } + + // fold (fadd x, (fpext (fma y, z, (fmul u, v))) + // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == PreferredFusedOpcode) { + SDValue N102 = N10.getOperand(2); + if (isContractableFMUL(N102) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), + N102.getOperand(0), N102.getOperand(1), + N0, Flags); + } + } + } + } + + return SDValue(); +} + +/// Try to perform FMA combining on a given FSUB node. +SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + const TargetOptions &Options = DAG.getTarget().Options; + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = + TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + const SDNodeFlags Flags = N->getFlags(); + bool CanFuse = Options.UnsafeFPMath || isContractable(N); + bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || + CanFuse || HasFMAD); + + // If the subtraction is not contractable, do not combine. + if (!AllowFusionGlobally && !isContractable(N)) + return SDValue(); + + const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); + if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + + // Is the node an FMUL and contractable either due to global flags or + // SDNodeFlags. + auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { + if (N.getOpcode() != ISD::FMUL) + return false; + return AllowFusionGlobally || isContractable(N.getNode()); + }; + + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); + } + + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + N1.getOperand(0)), + N1.getOperand(1), N0, Flags); + } + + // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) && + (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N00), N01, + DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); + } + + // Look through FP_EXTEND nodes to do more combining. + + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (isContractableFMUL(N00) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); + } + } + + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (isContractableFMUL(N10) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), + N0, Flags); + } + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fneg (fma (fpext x), (fpext y), z)) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (isContractableFMUL(N000) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1, Flags)); + } + } + } + + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fneg (fma (fpext x)), (fpext y), z) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (isContractableFMUL(N000) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1, Flags)); + } + } + } + + // More folding opportunities when target permits. + if (Aggressive) { + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && + isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() && + N0.getOperand(2)->hasOneUse()) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SL, VT, + N1), Flags), Flags); + } + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && + isContractableFMUL(N1.getOperand(2))) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N20), + N21, N0, Flags), Flags); + } + + + // fold (fsub (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (isContractableFMUL(N020) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1), Flags), Flags); + } + } + } + + // fold (fsub (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), + // (fma (fpext u), (fpext v), (fneg z))) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (isContractableFMUL(N002) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1), Flags), Flags); + } + } + } + + // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) + // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N1.getOperand(2).getOperand(0); + if (isContractableFMUL(N120) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + SDValue N1200 = N120.getOperand(0); + SDValue N1201 = N120.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1200)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1201), + N0, Flags), Flags); + } + } + + // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) + // -> (fma (fneg (fpext y)), (fpext z), + // (fma (fneg (fpext u)), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { + SDValue CvtSrc = N1.getOperand(0); + SDValue N100 = CvtSrc.getOperand(0); + SDValue N101 = CvtSrc.getOperand(1); + SDValue N102 = CvtSrc.getOperand(2); + if (isContractableFMUL(N102) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) { + SDValue N1020 = N102.getOperand(0); + SDValue N1021 = N102.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N100)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1020)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1021), + N0, Flags), Flags); + } + } + } + + return SDValue(); +} + +/// Try to perform FMA combining on a given FMUL node based on the distributive +/// law x * (y + 1) = x * y + x and variants thereof (commuted versions, +/// subtraction instead of addition). +SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + const SDNodeFlags Flags = N->getFlags(); + + assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); + + const TargetOptions &Options = DAG.getTarget().Options; + + // The transforms below are incorrect when x == 0 and y == inf, because the + // intermediate multiplication produces a nan. + if (!Options.NoInfsFPMath) + return SDValue(); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); + + // Floating-point multiply-add with intermediate rounding. This can result + // in a less precise result due to the changed rounding order. + bool HasFMAD = Options.UnsafeFPMath && + (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + + // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y) + // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y)) + auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { + if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { + if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) { + if (C->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + Y, Flags); + if (C->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + } + } + return SDValue(); + }; + + if (SDValue FMA = FuseFADD(N0, N1, Flags)) + return FMA; + if (SDValue FMA = FuseFADD(N1, N0, Flags)) + return FMA; + + // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y) + // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y)) + // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y)) + // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y) + auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { + if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { + if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) { + if (C0->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + Y, Flags); + if (C0->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + } + if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) { + if (C1->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + if (C1->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + Y, Flags); + } + } + return SDValue(); + }; + + if (SDValue FMA = FuseFSUB(N0, N1, Flags)) + return FMA; + if (SDValue FMA = FuseFSUB(N1, N0, Flags)) + return FMA; + + return SDValue(); +} + +SDValue DAGCombiner::visitFADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); + bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags Flags = N->getFlags(); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (fadd c1, c2) -> c1 + c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); + + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); + + // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) + ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); + if (N1C && N1C->isZero()) + if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) + return N0; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + // fold (fadd A, (fneg B)) -> (fsub A, B) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); + + // fold (fadd (fneg A), B) -> (fsub B, A) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N1, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags); + + auto isFMulNegTwo = [](SDValue FMul) { + if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) + return false; + auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true); + return C && C->isExactlyValue(-2.0); + }; + + // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B) + if (isFMulNegTwo(N0)) { + SDValue B = N0.getOperand(0); + SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags); + } + // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B) + if (isFMulNegTwo(N1)) { + SDValue B = N1.getOperand(0); + SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags); + } + + // No FP constant should be created after legalization as Instruction + // Selection pass has a hard time dealing with FP constants. + bool AllowNewConst = (Level < AfterLegalizeDAG); + + // If nnan is enabled, fold lots of things. + if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) { + // If allowed, fold (fadd (fneg x), x) -> 0.0 + if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) + return DAG.getConstantFP(0.0, DL, VT); + + // If allowed, fold (fadd x, (fneg x)) -> 0.0 + if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) + return DAG.getConstantFP(0.0, DL, VT); + } + + // If 'unsafe math' or reassoc and nsz, fold lots of things. + // TODO: break out portions of the transformations below for which Unsafe is + // considered and which do not require both nsz and reassoc + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + AllowNewConst) { + // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 + if (N1CFP && N0.getOpcode() == ISD::FADD && + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { + SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags); + return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags); + } + + // We can fold chains of FADD's of the same value into multiplications. + // This transform is not safe in general because we are reducing the number + // of rounding steps. + if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { + if (N0.getOpcode() == ISD::FMUL) { + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); + + // (fadd (fmul x, c), x) -> (fmul x, c+1) + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); + } + + // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); + } + } + + if (N1.getOpcode() == ISD::FMUL) { + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); + + // (fadd x, (fmul x, c)) -> (fmul x, c+1) + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); + } + + // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) + if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); + } + } + + if (N0.getOpcode() == ISD::FADD) { + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + // (fadd (fadd x, x), x) -> (fmul x, 3.0) + if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && + (N0.getOperand(0) == N1)) { + return DAG.getNode(ISD::FMUL, DL, VT, + N1, DAG.getConstantFP(3.0, DL, VT), Flags); + } + } + + if (N1.getOpcode() == ISD::FADD) { + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + // (fadd x, (fadd x, x)) -> (fmul x, 3.0) + if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, DL, VT, + N0, DAG.getConstantFP(3.0, DL, VT), Flags); + } + } + + // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) + if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), + DAG.getConstantFP(4.0, DL, VT), Flags); + } + } + } // enable-unsafe-fp-math + + // FADD -> FMA combines: + if (SDValue Fused = visitFADDForFMACombine(N)) { + AddToWorklist(Fused.getNode()); + return Fused; + } + return SDValue(); +} + +SDValue DAGCombiner::visitFSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); + EVT VT = N->getValueType(0); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags Flags = N->getFlags(); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (fsub c1, c2) -> c1-c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags); + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + // (fsub A, 0) -> A + if (N1CFP && N1CFP->isZero()) { + if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath || + Flags.hasNoSignedZeros()) { + return N0; + } + } + + if (N0 == N1) { + // (fsub x, x) -> 0.0 + if (Options.NoNaNsFPMath || Flags.hasNoNaNs()) + return DAG.getConstantFP(0.0f, DL, VT); + } + + // (fsub -0.0, N1) -> -N1 + // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the + // FSUB does not specify the sign bit of a NaN. Also note that for + // the same reason, the inverse transform is not safe, unless fast math + // flags are in play. + if (N0CFP && N0CFP->isZero()) { + if (N0CFP->isNegative() || + (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); + } + } + + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + N1.getOpcode() == ISD::FADD) { + // X - (X + Y) -> -Y + if (N0 == N1->getOperand(0)) + return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags); + // X - (Y + X) -> -Y + if (N0 == N1->getOperand(1)) + return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags); + } + + // fold (fsub A, (fneg B)) -> (fadd A, B) + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode( + ISD::FADD, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); + + // FSUB -> FMA combines: + if (SDValue Fused = visitFSUBForFMACombine(N)) { + AddToWorklist(Fused.getNode()); + return Fused; + } + + return SDValue(); +} + +/// Return true if both inputs are at least as cheap in negated form and at +/// least one input is strictly cheaper in negated form. +bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { + if (char LHSNeg = + TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize)) + if (char RHSNeg = + TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize)) + // Both negated operands are at least as cheap as their counterparts. + // Check to see if at least one is cheaper negated. + if (LHSNeg == 2 || RHSNeg == 2) + return true; + + return false; +} + +SDValue DAGCombiner::visitFMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); + EVT VT = N->getValueType(0); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags Flags = N->getFlags(); + + // fold vector ops + if (VT.isVector()) { + // This just handles C1 * C2 for vectors. Other vector folds are below. + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + } + + // fold (fmul c1, c2) -> c1*c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); + + // canonicalize constant to RHS + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) + return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) || + (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) { + // fold (fmul A, 0) -> 0 + if (N1CFP && N1CFP->isZero()) + return N1; + } + + if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { + // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 + if (isConstantFPBuildVectorOrConstantFP(N1) && + N0.getOpcode() == ISD::FMUL) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + // Avoid an infinite loop by making sure that N00 is not a constant + // (the inner multiply has not been constant folded yet). + if (isConstantFPBuildVectorOrConstantFP(N01) && + !isConstantFPBuildVectorOrConstantFP(N00)) { + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); + } + } + + // Match a special-case: we convert X * 2.0 into fadd. + // fmul (fadd X, X), C -> fmul X, 2.0 * C + if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && + N0.getOperand(0) == N0.getOperand(1)) { + const SDValue Two = DAG.getConstantFP(2.0, DL, VT); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); + } + } + + // fold (fmul X, 2.0) -> (fadd X, X) + if (N1CFP && N1CFP->isExactlyValue(+2.0)) + return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); + + // fold (fmul X, -1.0) -> (fneg X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, DL, VT, N0); + + // -N0 * -N1 --> N0 * N1 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); + } + + // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) + // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) + if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() && + (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) && + TLI.isOperationLegal(ISD::FABS, VT)) { + SDValue Select = N0, X = N1; + if (Select.getOpcode() != ISD::SELECT) + std::swap(Select, X); + + SDValue Cond = Select.getOperand(0); + auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1)); + auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2)); + + if (TrueOpnd && FalseOpnd && + Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X && + isa<ConstantFPSDNode>(Cond.getOperand(1)) && + cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) { + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + switch (CC) { + default: break; + case ISD::SETOLT: + case ISD::SETULT: + case ISD::SETOLE: + case ISD::SETULE: + case ISD::SETLT: + case ISD::SETLE: + std::swap(TrueOpnd, FalseOpnd); + LLVM_FALLTHROUGH; + case ISD::SETOGT: + case ISD::SETUGT: + case ISD::SETOGE: + case ISD::SETUGE: + case ISD::SETGT: + case ISD::SETGE: + if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) && + TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, DL, VT, + DAG.getNode(ISD::FABS, DL, VT, X)); + if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0)) + return DAG.getNode(ISD::FABS, DL, VT, X); + + break; + } + } + } + + // FMUL -> FMA combines: + if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) { + AddToWorklist(Fused.getNode()); + return Fused; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFMA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; + + // FMA nodes have flags that propagate to the created nodes. + const SDNodeFlags Flags = N->getFlags(); + bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N); + + // Constant fold FMA. + if (isa<ConstantFPSDNode>(N0) && + isa<ConstantFPSDNode>(N1) && + isa<ConstantFPSDNode>(N2)) { + return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); + } + + // (-N0 * -N1) + N2 --> (N0 * N1) + N2 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); + } + + if (UnsafeFPMath) { + if (N0CFP && N0CFP->isZero()) + return N2; + if (N1CFP && N1CFP->isZero()) + return N2; + } + // TODO: The FMA node should have flags that propagate to these nodes. + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); + if (N1CFP && N1CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); + + // Canonicalize (fma c, x, y) -> (fma x, c, y) + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); + + if (UnsafeFPMath) { + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), + Flags), Flags); + } + + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (N0.getOpcode() == ISD::FMUL && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { + return DAG.getNode(ISD::FMA, DL, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), + Flags), + N2); + } + } + + // (fma x, 1, y) -> (fadd x, y) + // (fma x, -1, y) -> (fadd (fneg x), y) + if (N1CFP) { + if (N1CFP->isExactlyValue(1.0)) + // TODO: The FMA node should have flags that propagate to this node. + return DAG.getNode(ISD::FADD, DL, VT, N0, N2); + + if (N1CFP->isExactlyValue(-1.0) && + (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { + SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); + AddToWorklist(RHSNeg.getNode()); + // TODO: The FMA node should have flags that propagate to this node. + return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); + } + + // fma (fneg x), K, y -> fma x -K, y + if (N0.getOpcode() == ISD::FNEG && + (TLI.isOperationLegal(ISD::ConstantFP, VT) || + (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, + ForCodeSize)))) { + return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2); + } + } + + if (UnsafeFPMath) { + // (fma x, c, x) -> (fmul x, (c+1)) + if (N1CFP && N0 == N2) { + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getNode(ISD::FADD, DL, VT, N1, + DAG.getConstantFP(1.0, DL, VT), Flags), + Flags); + } + + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getNode(ISD::FADD, DL, VT, N1, + DAG.getConstantFP(-1.0, DL, VT), Flags), + Flags); + } + } + + return SDValue(); +} + +// Combine multiple FDIVs with the same divisor into multiple FMULs by the +// reciprocal. +// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) +// Notice that this is not always beneficial. One reason is different targets +// may have different costs for FDIV and FMUL, so sometimes the cost of two +// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason +// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". +SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { + // TODO: Limit this transform based on optsize/minsize - it always creates at + // least 1 extra instruction. But the perf win may be substantial enough + // that only minsize should restrict this. + bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; + const SDNodeFlags Flags = N->getFlags(); + if (!UnsafeMath && !Flags.hasAllowReciprocal()) + return SDValue(); + + // Skip if current node is a reciprocal/fneg-reciprocal. + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true); + if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0))) + return SDValue(); + + // Exit early if the target does not want this transform or if there can't + // possibly be enough uses of the divisor to make the transform worthwhile. + SDValue N1 = N->getOperand(1); + unsigned MinUses = TLI.combineRepeatedFPDivisors(); + + // For splat vectors, scale the number of uses by the splat factor. If we can + // convert the division into a scalar op, that will likely be much faster. + unsigned NumElts = 1; + EVT VT = N->getValueType(0); + if (VT.isVector() && DAG.isSplatValue(N1)) + NumElts = VT.getVectorNumElements(); + + if (!MinUses || (N1->use_size() * NumElts) < MinUses) + return SDValue(); + + // Find all FDIV users of the same divisor. + // Use a set because duplicates may be present in the user list. + SetVector<SDNode *> Users; + for (auto *U : N1->uses()) { + if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { + // This division is eligible for optimization only if global unsafe math + // is enabled or if this division allows reciprocal formation. + if (UnsafeMath || U->getFlags().hasAllowReciprocal()) + Users.insert(U); + } + } + + // Now that we have the actual number of divisor uses, make sure it meets + // the minimum threshold specified by the target. + if ((Users.size() * NumElts) < MinUses) + return SDValue(); + + SDLoc DL(N); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); + + // Dividend / Divisor -> Dividend * Reciprocal + for (auto *U : Users) { + SDValue Dividend = U->getOperand(0); + if (Dividend != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, + Reciprocal, Flags); + CombineTo(U, NewNode); + } else if (U != Reciprocal.getNode()) { + // In the absence of fast-math-flags, this user node is always the + // same node as Reciprocal, but with FMF they may be different nodes. + CombineTo(U, Reciprocal); + } + } + return SDValue(N, 0); // N was replaced. +} + +SDValue DAGCombiner::visitFDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; + SDNodeFlags Flags = N->getFlags(); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (fdiv c1, c2) -> c1/c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + if (SDValue V = combineRepeatedFPDivisors(N)) + return V; + + if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP) { + // Compute the reciprocal 1.0 / c2. + const APFloat &N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || + TLI.isOperationLegal(ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT, ForCodeSize))) + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getConstantFP(Recip, DL, VT), Flags); + } + + // If this FDIV is part of a reciprocal square root, it may be folded + // into a target-specific square root estimate instruction. + if (N1.getOpcode() == ISD::FSQRT) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + } else if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { + RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + } + } else if (N1.getOpcode() == ISD::FP_ROUND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { + RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + } + } else if (N1.getOpcode() == ISD::FMUL) { + // Look through an FMUL. Even though this won't remove the FDIV directly, + // it's still worthwhile to get rid of the FSQRT if possible. + SDValue SqrtOp; + SDValue OtherOp; + if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { + SqrtOp = N1.getOperand(0); + OtherOp = N1.getOperand(1); + } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { + SqrtOp = N1.getOperand(1); + OtherOp = N1.getOperand(0); + } + if (SqrtOp.getNode()) { + // We found a FSQRT, so try to make this fold: + // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) + if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { + RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + } + } + } + + // Fold into a reciprocal estimate and multiply instead of a real divide. + if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) + return RV; + } + + // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) + if (isCheaperToUseNegatedFPOps(N0, N1)) + return DAG.getNode( + ISD::FDIV, SDLoc(N), VT, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); + + return SDValue(); +} + +SDValue DAGCombiner::visitFREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold (frem c1, c2) -> fmod(c1,c2) + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags()); + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + + return SDValue(); +} + +SDValue DAGCombiner::visitFSQRT(SDNode *N) { + SDNodeFlags Flags = N->getFlags(); + if (!DAG.getTarget().Options.UnsafeFPMath && + !Flags.hasApproximateFuncs()) + return SDValue(); + + SDValue N0 = N->getOperand(0); + if (TLI.isFsqrtCheap(N0, DAG)) + return SDValue(); + + // FSQRT nodes have flags that propagate to the created nodes. + return buildSqrtEstimate(N0, Flags); +} + +/// copysign(x, fp_extend(y)) -> copysign(x, y) +/// copysign(x, fp_round(y)) -> copysign(x, y) +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { + SDValue N1 = N->getOperand(1); + if ((N1.getOpcode() == ISD::FP_EXTEND || + N1.getOpcode() == ISD::FP_ROUND)) { + // Do not optimize out type conversion of f128 type yet. + // For some targets like x86_64, configuration is changed to keep one f128 + // value in one SSE register, but instruction selection cannot handle + // FCOPYSIGN on SSE registers yet. + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0).getValueType(); + return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); + } + return false; +} + +SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); + bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); + EVT VT = N->getValueType(0); + + if (N0CFP && N1CFP) // Constant fold + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); + + if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) { + const APFloat &V = N1C->getValueAPF(); + // copysign(x, c1) -> fabs(x) iff ispos(c1) + // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) + if (!V.isNegative()) { + if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + } else { + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, + DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); + } + } + + // copysign(fabs(x), y) -> copysign(x, y) + // copysign(fneg(x), y) -> copysign(x, y) + // copysign(copysign(x,z), y) -> copysign(x, y) + if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || + N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); + + // copysign(x, abs(y)) -> abs(x) + if (N1.getOpcode() == ISD::FABS) + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + + // copysign(x, copysign(y,z)) -> copysign(x, z) + if (N1.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); + + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFPOW(SDNode *N) { + ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1)); + if (!ExponentC) + return SDValue(); + + // Try to convert x ** (1/3) into cube root. + // TODO: Handle the various flavors of long double. + // TODO: Since we're approximating, we don't need an exact 1/3 exponent. + // Some range near 1/3 should be fine. + EVT VT = N->getValueType(0); + if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) || + (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) { + // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0. + // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf. + // pow(-val, 1/3) = nan; cbrt(-val) = -num. + // For regular numbers, rounding may cause the results to differ. + // Therefore, we require { nsz ninf nnan afn } for this transform. + // TODO: We could select out the special cases if we don't have nsz/ninf. + SDNodeFlags Flags = N->getFlags(); + if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() || + !Flags.hasApproximateFuncs()) + return SDValue(); + + // Do not create a cbrt() libcall if the target does not have it, and do not + // turn a pow that has lowering support into a cbrt() libcall. + if (!DAG.getLibInfo().has(LibFunc_cbrt) || + (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) && + DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT))) + return SDValue(); + + return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags); + } + + // Try to convert x ** (1/4) and x ** (3/4) into square roots. + // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case. + // TODO: This could be extended (using a target hook) to handle smaller + // power-of-2 fractional exponents. + bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25); + bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75); + if (ExponentIs025 || ExponentIs075) { + // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0. + // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN. + // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0. + // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN. + // For regular numbers, rounding may cause the results to differ. + // Therefore, we require { nsz ninf afn } for this transform. + // TODO: We could select out the special cases if we don't have nsz/ninf. + SDNodeFlags Flags = N->getFlags(); + + // We only need no signed zeros for the 0.25 case. + if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() || + !Flags.hasApproximateFuncs()) + return SDValue(); + + // Don't double the number of libcalls. We are trying to inline fast code. + if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT)) + return SDValue(); + + // Assume that libcalls are the smallest code. + // TODO: This restriction should probably be lifted for vectors. + if (DAG.getMachineFunction().getFunction().hasOptSize()) + return SDValue(); + + // pow(X, 0.25) --> sqrt(sqrt(X)) + SDLoc DL(N); + SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags); + SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags); + if (ExponentIs025) + return SqrtSqrt; + // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X)) + return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags); + } + + return SDValue(); +} + +static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + // This optimization is guarded by a function attribute because it may produce + // unexpected results. Ie, programs may be relying on the platform-specific + // undefined behavior when the float-to-int conversion overflows. + const Function &F = DAG.getMachineFunction().getFunction(); + Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow"); + if (StrictOverflow.getValueAsString().equals("false")) + return SDValue(); + + // We only do this if the target has legal ftrunc. Otherwise, we'd likely be + // replacing casts with a libcall. We also must be allowed to ignore -0.0 + // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer + // conversions would return +0.0. + // FIXME: We should be able to use node-level FMF here. + // TODO: If strict math, should we use FABS (+ range check for signed cast)? + EVT VT = N->getValueType(0); + if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || + !DAG.getTarget().Options.NoSignedZerosFPMath) + return SDValue(); + + // fptosi/fptoui round towards zero, so converting from FP to integer and + // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X + SDValue N0 = N->getOperand(0); + if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && + N0.getOperand(0).getValueType() == VT) + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); + + if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && + N0.getOperand(0).getValueType() == VT) + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + + // [us]itofp(undef) = 0, because the result value is bounded. + if (N0.isUndef()) + return DAG.getConstantFP(0.0, SDLoc(N), VT); + + // fold (sint_to_fp c1) -> c1fp + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + // ...but only if the target supports immediate floating-point values + (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); + + // If the input is a legal type, and SINT_TO_FP is not legal on this target, + // but UINT_TO_FP is legal on this target, try to convert. + if (!hasOperation(ISD::SINT_TO_FP, OpVT) && + hasOperation(ISD::UINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to UINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); + } + + // The next optimizations are desirable only if SELECT_CC can be lowered. + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { + // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && + !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { + SDLoc DL(N); + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); + } + + // fold (sint_to_fp (zext (setcc x, y, cc))) -> + // (select_cc x, y, 1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { + SDLoc DL(N); + SDValue Ops[] = + { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), + N0.getOperand(0).getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); + } + } + + if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) + return FTrunc; + + return SDValue(); +} + +SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + + // [us]itofp(undef) = 0, because the result value is bounded. + if (N0.isUndef()) + return DAG.getConstantFP(0.0, SDLoc(N), VT); + + // fold (uint_to_fp c1) -> c1fp + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + // ...but only if the target supports immediate floating-point values + (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); + + // If the input is a legal type, and UINT_TO_FP is not legal on this target, + // but SINT_TO_FP is legal on this target, try to convert. + if (!hasOperation(ISD::UINT_TO_FP, OpVT) && + hasOperation(ISD::SINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to SINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); + } + + // The next optimizations are desirable only if SELECT_CC can be lowered. + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { + // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { + SDLoc DL(N); + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); + } + } + + if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) + return FTrunc; + + return SDValue(); +} + +// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x +static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) + return SDValue(); + + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; + bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; + + // We can safely assume the conversion won't overflow the output range, + // because (for example) (uint8_t)18293.f is undefined behavior. + + // Since we can assume the conversion won't overflow, our decision as to + // whether the input will fit in the float should depend on the minimum + // of the input range and output range. + + // This means this is also safe for a signed input and unsigned output, since + // a negative input would lead to undefined behavior. + unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; + unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; + unsigned ActualSize = std::min(InputSize, OutputSize); + const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); + + // We can only fold away the float conversion if the input range can be + // represented exactly in the float range. + if (APFloat::semanticsPrecision(sem) >= ActualSize) { + if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { + unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOp, SDLoc(N), VT, Src); + } + if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); + return DAG.getBitcast(VT, Src); + } + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (fp_to_sint undef) -> undef + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + // fold (fp_to_sint c1fp) -> c1 + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); + + return FoldIntToFPToInt(N, DAG); +} + +SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (fp_to_uint undef) -> undef + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + // fold (fp_to_uint c1fp) -> c1 + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); + + return FoldIntToFPToInt(N, DAG); +} + +SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fp_round c1fp) -> c1fp + if (N0CFP) + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); + + // fold (fp_round (fp_extend x)) -> x + if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) + return N0.getOperand(0); + + // fold (fp_round (fp_round x)) -> (fp_round x) + if (N0.getOpcode() == ISD::FP_ROUND) { + const bool NIsTrunc = N->getConstantOperandVal(1) == 1; + const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1; + + // Skip this folding if it results in an fp_round from f80 to f16. + // + // f80 to f16 always generates an expensive (and as yet, unimplemented) + // libcall to __truncxfhf2 instead of selecting native f16 conversion + // instructions from f32 or f64. Moreover, the first (value-preserving) + // fp_round from f80 to either f32 or f64 may become a NOP in platforms like + // x86. + if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16) + return SDValue(); + + // If the first fp_round isn't a value preserving truncation, it might + // introduce a tie in the second fp_round, that wouldn't occur in the + // single-step fp_round we want to fold to. + // In other words, double rounding isn't the same as rounding. + // Also, this is a value preserving truncation iff both fp_round's are. + if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { + SDLoc DL(N); + return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); + } + } + + // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { + SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, + N0.getOperand(0), N1); + AddToWorklist(Tmp.getNode()); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, + Tmp, N0.getOperand(1)); + } + + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. + if (N->hasOneUse() && + N->use_begin()->getOpcode() == ISD::FP_ROUND) + return SDValue(); + + // fold (fp_extend c1fp) -> c1fp + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); + + // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) + if (N0.getOpcode() == ISD::FP16_TO_FP && + TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); + + // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the + // value of X. + if (N0.getOpcode() == ISD::FP_ROUND + && N0.getConstantOperandVal(1) == 1) { + SDValue In = N0.getOperand(0); + if (In.getValueType() == VT) return In; + if (VT.bitsLT(In.getValueType())) + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, + In, N0.getOperand(1)); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); + } + + // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::FP_ROUND, SDLoc(N0), + N0.getValueType(), ExtLoad, + DAG.getIntPtrConstant(1, SDLoc(N0))), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + + return SDValue(); +} + +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + + // fold ftrunc (known rounded int x) -> x + // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is + // likely to be generated to extract integer from a rounded floating value. + switch (N0.getOpcode()) { + default: break; + case ISD::FRINT: + case ISD::FTRUNC: + case ISD::FNEARBYINT: + case ISD::FFLOOR: + case ISD::FCEIL: + return N0; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); + + return SDValue(); +} + +// FIXME: FNEG and FABS have a lot in common; refactor. +SDValue DAGCombiner::visitFNEG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Constant fold FNEG. + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); + + if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + + // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading + // constant pool values. + if (!TLI.isFNegFree(VT) && + N0.getOpcode() == ISD::BITCAST && + N0.getNode()->hasOneUse()) { + SDValue Int = N0.getOperand(0); + EVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + APInt SignMask; + if (N0.getValueType().isVector()) { + // For a vector, get a mask such as 0x80... per scalar element + // and splat it. + SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits()); + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For a scalar, just generate 0x80... + SignMask = APInt::getSignMask(IntVT.getSizeInBits()); + } + SDLoc DL0(N0); + Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, + DAG.getConstant(SignMask, DL0, IntVT)); + AddToWorklist(Int.getNode()); + return DAG.getBitcast(VT, Int); + } + } + + // (fneg (fmul c, x)) -> (fmul -c, x) + if (N0.getOpcode() == ISD::FMUL && + (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { + ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + if (CFP1) { + APFloat CVal = CFP1->getValueAPF(); + CVal.changeSign(); + if (Level >= AfterLegalizeDAG && + (TLI.isFPImmLegal(CVal, VT, ForCodeSize) || + TLI.isOperationLegal(ISD::ConstantFP, VT))) + return DAG.getNode( + ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), + N0->getFlags()); + } + } + + return SDValue(); +} + +static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, + APFloat (*Op)(const APFloat &, const APFloat &)) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); + + if (N0CFP && N1CFP) { + const APFloat &C0 = N0CFP->getValueAPF(); + const APFloat &C1 = N1CFP->getValueAPF(); + return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT); + } + + // Canonicalize to constant on RHS. + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFMINNUM(SDNode *N) { + return visitFMinMax(DAG, N, minnum); +} + +SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { + return visitFMinMax(DAG, N, maxnum); +} + +SDValue DAGCombiner::visitFMINIMUM(SDNode *N) { + return visitFMinMax(DAG, N, minimum); +} + +SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) { + return visitFMinMax(DAG, N, maximum); +} + +SDValue DAGCombiner::visitFABS(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (fabs c1) -> fabs(c1) + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + + // fold (fabs (fabs x)) -> (fabs x) + if (N0.getOpcode() == ISD::FABS) + return N->getOperand(0); + + // fold (fabs (fneg x)) -> (fabs x) + // fold (fabs (fcopysign x, y)) -> (fabs x) + if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); + + // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads. + if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) { + SDValue Int = N0.getOperand(0); + EVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + APInt SignMask; + if (N0.getValueType().isVector()) { + // For a vector, get a mask such as 0x7f... per scalar element + // and splat it. + SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits()); + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For a scalar, just generate 0x7f... + SignMask = ~APInt::getSignMask(IntVT.getSizeInBits()); + } + SDLoc DL(N0); + Int = DAG.getNode(ISD::AND, DL, IntVT, Int, + DAG.getConstant(SignMask, DL, IntVT)); + AddToWorklist(Int.getNode()); + return DAG.getBitcast(N->getValueType(0), Int); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBRCOND(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal + // on the target. + if (N1.getOpcode() == ISD::SETCC && + TLI.isOperationLegalOrCustom(ISD::BR_CC, + N1.getOperand(0).getValueType())) { + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, + Chain, N1.getOperand(2), + N1.getOperand(0), N1.getOperand(1), N2); + } + + if (N1.hasOneUse()) { + if (SDValue NewN1 = rebuildSetCC(N1)) + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); + } + + return SDValue(); +} + +SDValue DAGCombiner::rebuildSetCC(SDValue N) { + if (N.getOpcode() == ISD::SRL || + (N.getOpcode() == ISD::TRUNCATE && + (N.getOperand(0).hasOneUse() && + N.getOperand(0).getOpcode() == ISD::SRL))) { + // Look pass the truncate. + if (N.getOpcode() == ISD::TRUNCATE) + N = N.getOperand(0); + + // Match this pattern so that we can generate simpler code: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and i32 %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // This applies only when the AND constant value has one bit set and the + // SRL constant is equal to the log2 of the AND constant. The back-end is + // smart enough to convert the result into a TEST/JMP sequence. + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); + + if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) { + SDValue AndOp1 = Op0.getOperand(1); + + if (AndOp1.getOpcode() == ISD::Constant) { + const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); + + if (AndConst.isPowerOf2() && + cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) { + SDLoc DL(N); + return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, DL, Op0.getValueType()), + ISD::SETNE); + } + } + } + } + + // Transform br(xor(x, y)) -> br(x != y) + // Transform br(xor(xor(x,y), 1)) -> br (x == y) + if (N.getOpcode() == ISD::XOR) { + // Because we may call this on a speculatively constructed + // SimplifiedSetCC Node, we need to simplify this node first. + // Ideally this should be folded into SimplifySetCC and not + // here. For now, grab a handle to N so we don't lose it from + // replacements interal to the visit. + HandleSDNode XORHandle(N); + while (N.getOpcode() == ISD::XOR) { + SDValue Tmp = visitXOR(N.getNode()); + // No simplification done. + if (!Tmp.getNode()) + break; + // Returning N is form in-visit replacement that may invalidated + // N. Grab value from Handle. + if (Tmp.getNode() == N.getNode()) + N = XORHandle.getValue(); + else // Node simplified. Try simplifying again. + N = Tmp; + } + + if (N.getOpcode() != ISD::XOR) + return N; + + SDNode *TheXor = N.getNode(); + + SDValue Op0 = TheXor->getOperand(0); + SDValue Op1 = TheXor->getOperand(1); + + if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { + bool Equal = false; + if (isOneConstant(Op0) && Op0.hasOneUse() && + Op0.getOpcode() == ISD::XOR) { + TheXor = Op0.getNode(); + Equal = true; + } + + EVT SetCCVT = N.getValueType(); + if (LegalTypes) + SetCCVT = getSetCCResultType(SetCCVT); + // Replace the uses of XOR with SETCC + return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, + Equal ? ISD::SETEQ : ISD::SETNE); + } + } + + return SDValue(); +} + +// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. +// +SDValue DAGCombiner::visitBR_CC(SDNode *N) { + CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); + SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + + // Use SimplifySetCC to simplify SETCC's. + SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), + CondLHS, CondRHS, CC->get(), SDLoc(N), + false); + if (Simp.getNode()) AddToWorklist(Simp.getNode()); + + // fold to a simpler setcc + if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, + N->getOperand(0), Simp.getOperand(2), + Simp.getOperand(0), Simp.getOperand(1), + N->getOperand(4)); + + return SDValue(); +} + +/// Return true if 'Use' is a load or a store that uses N as its base pointer +/// and that N may be folded in the load / store addressing mode. +static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, + SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT; + unsigned AS; + + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); + } else + return false; + + TargetLowering::AddrMode AM; + if (N->getOpcode() == ISD::ADD) { + AM.HasBaseReg = true; + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else if (N->getOpcode() == ISD::SUB) { + AM.HasBaseReg = true; + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = -Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else + return false; + + return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, + VT.getTypeForEVT(*DAG.getContext()), AS); +} + +/// Try turning a load/store into a pre-indexed load/store when the base +/// pointer is an add or subtract and it has other uses besides the load/store. +/// After the transformation, the new indexed load/store has effectively folded +/// the add/subtract in and all of its other uses are redirected to the +/// new load/store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + bool isLoad = true; + SDValue Ptr; + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail + // out. There is no reason to make this a preinc/predec. + if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || + Ptr.getNode()->hasOneUse()) + return false; + + // Ask the target to do addressing mode selection. + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) + return false; + + // Backends without true r+i pre-indexed forms may need to pass a + // constant base with a variable offset so that constant coercion + // will work with the patterns in canonical form. + bool Swapped = false; + if (isa<ConstantSDNode>(BasePtr)) { + std::swap(BasePtr, Offset); + Swapped = true; + } + + // Don't create a indexed load / store with zero offset. + if (isNullConstant(Offset)) + return false; + + // Try turning it into a pre-indexed load / store except when: + // 1) The new base ptr is a frame index. + // 2) If N is a store and the new base ptr is either the same as or is a + // predecessor of the value being stored. + // 3) Another use of old base ptr is a predecessor of N. If ptr is folded + // that would create a cycle. + // 4) All uses are load / store ops that use it as old base ptr. + + // Check #1. Preinc'ing a frame index would require copying the stack pointer + // (plus the implicit offset) to a register to preinc anyway. + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + return false; + + // Check #2. + if (!isLoad) { + SDValue Val = cast<StoreSDNode>(N)->getValue(); + + // Would require a copy. + if (Val == BasePtr) + return false; + + // Would create a cycle. + if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode())) + return false; + } + + // Caches for hasPredecessorHelper. + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + Worklist.push_back(N); + + // If the offset is a constant, there may be other adds of constants that + // can be folded with this one. We should do this to avoid having to keep + // a copy of the original base pointer. + SmallVector<SDNode *, 16> OtherUses; + if (isa<ConstantSDNode>(Offset)) + for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), + UE = BasePtr.getNode()->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + // Skip the use that is Ptr and uses of other results from BasePtr's + // node (important for nodes that return multiple results). + if (Use.getUser() == Ptr.getNode() || Use != BasePtr) + continue; + + if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)) + continue; + + if (Use.getUser()->getOpcode() != ISD::ADD && + Use.getUser()->getOpcode() != ISD::SUB) { + OtherUses.clear(); + break; + } + + SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); + if (!isa<ConstantSDNode>(Op1)) { + OtherUses.clear(); + break; + } + + // FIXME: In some cases, we can be smarter about this. + if (Op1.getValueType() != Offset.getValueType()) { + OtherUses.clear(); + break; + } + + OtherUses.push_back(Use.getUser()); + } + + if (Swapped) + std::swap(BasePtr, Offset); + + // Now check for #3 and #4. + bool RealUse = false; + + for (SDNode *Use : Ptr.getNode()->uses()) { + if (Use == N) + continue; + if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) + return false; + + // If Ptr may be folded in addressing mode of other use, then it's + // not profitable to do this transformation. + if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) + RealUse = true; + } + + if (!RealUse) + return false; + + SDValue Result; + if (isLoad) + Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), + BasePtr, Offset, AM); + else + Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), + BasePtr, Offset, AM); + ++PreIndexedNodes; + ++NodesCombined; + LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; + Result.getNode()->dump(&DAG); dbgs() << '\n'); + WorklistRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); + } + + // Finally, since the node is now dead, remove it from the graph. + deleteAndRecombine(N); + + if (Swapped) + std::swap(BasePtr, Offset); + + // Replace other uses of BasePtr that can be updated to use Ptr + for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { + unsigned OffsetIdx = 1; + if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) + OffsetIdx = 0; + assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == + BasePtr.getNode() && "Expected BasePtr operand"); + + // We need to replace ptr0 in the following expression: + // x0 * offset0 + y0 * ptr0 = t0 + // knowing that + // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) + // + // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the + // indexed load/store and the expression that needs to be re-written. + // + // Therefore, we have: + // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 + + ConstantSDNode *CN = + cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); + int X0, X1, Y0, Y1; + const APInt &Offset0 = CN->getAPIntValue(); + APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); + + X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; + Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; + X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; + Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; + + unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; + + APInt CNV = Offset0; + if (X0 < 0) CNV = -CNV; + if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; + else CNV = CNV - Offset1; + + SDLoc DL(OtherUses[i]); + + // We can now generate the new expression. + SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); + SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); + + SDValue NewUse = DAG.getNode(Opcode, + DL, + OtherUses[i]->getValueType(0), NewOp1, NewOp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); + deleteAndRecombine(OtherUses[i]); + } + + // Replace the uses of Ptr with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); + deleteAndRecombine(Ptr.getNode()); + AddToWorklist(Result.getNode()); + + return true; +} + +/// Try to combine a load/store with a add/sub of the base pointer node into a +/// post-indexed load/store. The transformation folded the add/subtract into the +/// new indexed load/store effectively and all of its uses are redirected to the +/// new load/store. +bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + bool isLoad = true; + SDValue Ptr; + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + if (Ptr.getNode()->hasOneUse()) + return false; + + for (SDNode *Op : Ptr.getNode()->uses()) { + if (Op == N || + (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) + continue; + + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { + // Don't create a indexed load / store with zero offset. + if (isNullConstant(Offset)) + continue; + + // Try turning it into a post-indexed load / store except when + // 1) All uses are load / store ops that use it as base ptr (and + // it may be folded as addressing mmode). + // 2) Op must be independent of N, i.e. Op is neither a predecessor + // nor a successor of N. Otherwise, if Op is folded that would + // create a cycle. + + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + continue; + + // Check for #1. + bool TryNext = false; + for (SDNode *Use : BasePtr.getNode()->uses()) { + if (Use == Ptr.getNode()) + continue; + + // If all the uses are load / store addresses, then don't do the + // transformation. + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + bool RealUse = false; + for (SDNode *UseUse : Use->uses()) { + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) + RealUse = true; + } + + if (!RealUse) { + TryNext = true; + break; + } + } + } + + if (TryNext) + continue; + + // Check for #2. + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 8> Worklist; + // Ptr is predecessor to both N and Op. + Visited.insert(Ptr.getNode()); + Worklist.push_back(N); + Worklist.push_back(Op); + if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && + !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) { + SDValue Result = isLoad + ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), + BasePtr, Offset, AM) + : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), + BasePtr, Offset, AM); + ++PostIndexedNodes; + ++NodesCombined; + LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); + dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorklistRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); + } + + // Finally, since the node is now dead, remove it from the graph. + deleteAndRecombine(N); + + // Replace the uses of Use with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), + Result.getValue(isLoad ? 1 : 0)); + deleteAndRecombine(Op); + return true; + } + } + } + + return false; +} + +/// Return the base-pointer arithmetic from an indexed \p LD. +SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + assert(AM != ISD::UNINDEXED); + SDValue BP = LD->getOperand(1); + SDValue Inc = LD->getOperand(2); + + // Some backends use TargetConstants for load offsets, but don't expect + // TargetConstants in general ADD nodes. We can convert these constants into + // regular Constants (if the constant is not opaque). + assert((Inc.getOpcode() != ISD::TargetConstant || + !cast<ConstantSDNode>(Inc)->isOpaque()) && + "Cannot split out indexing using opaque target constants"); + if (Inc.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); + Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), + ConstInc->getValueType(0)); + } + + unsigned Opc = + (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); + return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); +} + +static inline int numVectorEltsOrZero(EVT T) { + return T.isVector() ? T.getVectorNumElements() : 0; +} + +bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { + Val = ST->getValue(); + EVT STType = Val.getValueType(); + EVT STMemType = ST->getMemoryVT(); + if (STType == STMemType) + return true; + if (isTypeLegal(STMemType)) + return false; // fail. + if (STType.isFloatingPoint() && STMemType.isFloatingPoint() && + TLI.isOperationLegal(ISD::FTRUNC, STMemType)) { + Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val); + return true; + } + if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) && + STType.isInteger() && STMemType.isInteger()) { + Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val); + return true; + } + if (STType.getSizeInBits() == STMemType.getSizeInBits()) { + Val = DAG.getBitcast(STMemType, Val); + return true; + } + return false; // fail. +} + +bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { + EVT LDMemType = LD->getMemoryVT(); + EVT LDType = LD->getValueType(0); + assert(Val.getValueType() == LDMemType && + "Attempting to extend value of non-matching type"); + if (LDType == LDMemType) + return true; + if (LDMemType.isInteger() && LDType.isInteger()) { + switch (LD->getExtensionType()) { + case ISD::NON_EXTLOAD: + Val = DAG.getBitcast(LDType, Val); + return true; + case ISD::EXTLOAD: + Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val); + return true; + case ISD::SEXTLOAD: + Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val); + return true; + case ISD::ZEXTLOAD: + Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val); + return true; + } + } + return false; +} + +SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { + if (OptLevel == CodeGenOpt::None || !LD->isSimple()) + return SDValue(); + SDValue Chain = LD->getOperand(0); + StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode()); + // TODO: Relax this restriction for unordered atomics (see D66309) + if (!ST || !ST->isSimple()) + return SDValue(); + + EVT LDType = LD->getValueType(0); + EVT LDMemType = LD->getMemoryVT(); + EVT STMemType = ST->getMemoryVT(); + EVT STType = ST->getValue().getValueType(); + + BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG); + BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG); + int64_t Offset; + if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset)) + return SDValue(); + + // Normalize for Endianness. After this Offset=0 will denote that the least + // significant bit in the loaded value maps to the least significant bit in + // the stored value). With Offset=n (for n > 0) the loaded value starts at the + // n:th least significant byte of the stored value. + if (DAG.getDataLayout().isBigEndian()) + Offset = (STMemType.getStoreSizeInBits() - + LDMemType.getStoreSizeInBits()) / 8 - Offset; + + // Check that the stored value cover all bits that are loaded. + bool STCoversLD = + (Offset >= 0) && + (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits()); + + auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue { + if (LD->isIndexed()) { + bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC || + LD->getAddressingMode() == ISD::POST_DEC); + unsigned Opc = IsSub ? ISD::SUB : ISD::ADD; + SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(), + LD->getOperand(1), LD->getOperand(2)); + SDValue Ops[] = {Val, Idx, Chain}; + return CombineTo(LD, Ops, 3); + } + return CombineTo(LD, Val, Chain); + }; + + if (!STCoversLD) + return SDValue(); + + // Memory as copy space (potentially masked). + if (Offset == 0 && LDType == STType && STMemType == LDMemType) { + // Simple case: Direct non-truncating forwarding + if (LDType.getSizeInBits() == LDMemType.getSizeInBits()) + return ReplaceLd(LD, ST->getValue(), Chain); + // Can we model the truncate and extension with an and mask? + if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() && + !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) { + // Mask to size of LDMemType + auto Mask = + DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(), + STMemType.getSizeInBits()), + SDLoc(ST), STType); + auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask); + return ReplaceLd(LD, Val, Chain); + } + } + + // TODO: Deal with nonzero offset. + if (LD->getBasePtr().isUndef() || Offset != 0) + return SDValue(); + // Model necessary truncations / extenstions. + SDValue Val; + // Truncate Value To Stored Memory Size. + do { + if (!getTruncatedStoreValue(ST, Val)) + continue; + if (!isTypeLegal(LDMemType)) + continue; + if (STMemType != LDMemType) { + // TODO: Support vectors? This requires extract_subvector/bitcast. + if (!STMemType.isVector() && !LDMemType.isVector() && + STMemType.isInteger() && LDMemType.isInteger()) + Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val); + else + continue; + } + if (!extendLoadedValueToExtension(LD, Val)) + continue; + return ReplaceLd(LD, Val, Chain); + } while (false); + + // On failure, cleanup dead nodes we may have created. + if (Val->use_empty()) + deleteAndRecombine(Val.getNode()); + return SDValue(); +} + +SDValue DAGCombiner::visitLOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + // If load is not volatile and there are no uses of the loaded value (and + // the updated indexed value in case of indexed loads), change uses of the + // chain value into uses of the chain input (i.e. delete the dead load). + // TODO: Allow this for unordered atomics (see D66309) + if (LD->isSimple()) { + if (N->getValueType(1) == MVT::Other) { + // Unindexed loads. + if (!N->hasAnyUseOfValue(0)) { + // It's not safe to use the two value CombineTo variant here. e.g. + // v1, chain2 = load chain1, loc + // v2, chain3 = load chain2, loc + // v3 = add v2, c + // Now we replace use of chain2 with chain1. This makes the second load + // isomorphic to the one we are deleting, and thus makes this load live. + LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); + dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); + dbgs() << "\n"); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + AddUsersToWorklist(Chain.getNode()); + if (N->use_empty()) + deleteAndRecombine(N); + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } else { + // Indexed loads. + assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); + + // If this load has an opaque TargetConstant offset, then we cannot split + // the indexing into an add/sub directly (that TargetConstant may not be + // valid for a different type of node, and we cannot convert an opaque + // target constant into a regular constant). + bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && + cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); + + if (!N->hasAnyUseOfValue(0) && + ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { + SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + SDValue Index; + if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { + Index = SplitIndexingFromLoad(LD); + // Try to fold the base pointer arithmetic into subsequent loads and + // stores. + AddUsersToWorklist(N); + } else + Index = DAG.getUNDEF(N->getValueType(1)); + LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); + dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); + dbgs() << " and 2 other values\n"); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); + deleteAndRecombine(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + // If this load is directly stored, replace the load value with the stored + // value. + if (auto V = ForwardStoreValueToDirectLoad(LD)) + return V; + + // Try to infer better alignment information than the load already has. + if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { + if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) { + SDValue NewLoad = DAG.getExtLoad( + LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), LD->getMemoryVT(), Align, + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + // NewLoad will always be N as we are only refining the alignment + assert(NewLoad.getNode() == N); + (void)NewLoad; + } + } + } + + if (LD->isUnindexed()) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(LD, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + SDValue ReplLoad; + + // Replace the chain to void dependency. + if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), + BetterChain, Ptr, LD->getMemOperand()); + } else { + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), + LD->getValueType(0), + BetterChain, Ptr, LD->getMemoryVT(), + LD->getMemOperand()); + } + + // Create token factor to keep old chain connected. + SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), + MVT::Other, Chain, ReplLoad.getValue(1)); + + // Replace uses with load result and token factor + return CombineTo(N, ReplLoad.getValue(0), Token); + } + } + + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + // Try to slice up N to more direct loads if the slices are mapped to + // different register banks or pairing can take place. + if (SliceUpLoad(N)) + return SDValue(N, 0); + + return SDValue(); +} + +namespace { + +/// Helper structure used to slice a load in smaller loads. +/// Basically a slice is obtained from the following sequence: +/// Origin = load Ty1, Base +/// Shift = srl Ty1 Origin, CstTy Amount +/// Inst = trunc Shift to Ty2 +/// +/// Then, it will be rewritten into: +/// Slice = load SliceTy, Base + SliceOffset +/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 +/// +/// SliceTy is deduced from the number of bits that are actually used to +/// build Inst. +struct LoadedSlice { + /// Helper structure used to compute the cost of a slice. + struct Cost { + /// Are we optimizing for code size. + bool ForCodeSize = false; + + /// Various cost. + unsigned Loads = 0; + unsigned Truncates = 0; + unsigned CrossRegisterBanksCopies = 0; + unsigned ZExts = 0; + unsigned Shift = 0; + + explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {} + + /// Get the cost of one isolated slice. + Cost(const LoadedSlice &LS, bool ForCodeSize) + : ForCodeSize(ForCodeSize), Loads(1) { + EVT TruncType = LS.Inst->getValueType(0); + EVT LoadedType = LS.getLoadedType(); + if (TruncType != LoadedType && + !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) + ZExts = 1; + } + + /// Account for slicing gain in the current cost. + /// Slicing provide a few gains like removing a shift or a + /// truncate. This method allows to grow the cost of the original + /// load with the gain from this slice. + void addSliceGain(const LoadedSlice &LS) { + // Each slice saves a truncate. + const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); + if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), + LS.Inst->getValueType(0))) + ++Truncates; + // If there is a shift amount, this slice gets rid of it. + if (LS.Shift) + ++Shift; + // If this slice can merge a cross register bank copy, account for it. + if (LS.canMergeExpensiveCrossRegisterBankCopy()) + ++CrossRegisterBanksCopies; + } + + Cost &operator+=(const Cost &RHS) { + Loads += RHS.Loads; + Truncates += RHS.Truncates; + CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; + ZExts += RHS.ZExts; + Shift += RHS.Shift; + return *this; + } + + bool operator==(const Cost &RHS) const { + return Loads == RHS.Loads && Truncates == RHS.Truncates && + CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && + ZExts == RHS.ZExts && Shift == RHS.Shift; + } + + bool operator!=(const Cost &RHS) const { return !(*this == RHS); } + + bool operator<(const Cost &RHS) const { + // Assume cross register banks copies are as expensive as loads. + // FIXME: Do we want some more target hooks? + unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; + unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; + // Unless we are optimizing for code size, consider the + // expensive operation first. + if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) + return ExpensiveOpsLHS < ExpensiveOpsRHS; + return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < + (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); + } + + bool operator>(const Cost &RHS) const { return RHS < *this; } + + bool operator<=(const Cost &RHS) const { return !(RHS < *this); } + + bool operator>=(const Cost &RHS) const { return !(*this < RHS); } + }; + + // The last instruction that represent the slice. This should be a + // truncate instruction. + SDNode *Inst; + + // The original load instruction. + LoadSDNode *Origin; + + // The right shift amount in bits from the original load. + unsigned Shift; + + // The DAG from which Origin came from. + // This is used to get some contextual information about legal types, etc. + SelectionDAG *DAG; + + LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, + unsigned Shift = 0, SelectionDAG *DAG = nullptr) + : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} + + /// Get the bits used in a chunk of bits \p BitWidth large. + /// \return Result is \p BitWidth and has used bits set to 1 and + /// not used bits set to 0. + APInt getUsedBits() const { + // Reproduce the trunc(lshr) sequence: + // - Start from the truncated value. + // - Zero extend to the desired bit width. + // - Shift left. + assert(Origin && "No original load to compare against."); + unsigned BitWidth = Origin->getValueSizeInBits(0); + assert(Inst && "This slice is not bound to an instruction"); + assert(Inst->getValueSizeInBits(0) <= BitWidth && + "Extracted slice is bigger than the whole type!"); + APInt UsedBits(Inst->getValueSizeInBits(0), 0); + UsedBits.setAllBits(); + UsedBits = UsedBits.zext(BitWidth); + UsedBits <<= Shift; + return UsedBits; + } + + /// Get the size of the slice to be loaded in bytes. + unsigned getLoadedSize() const { + unsigned SliceSize = getUsedBits().countPopulation(); + assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); + return SliceSize / 8; + } + + /// Get the type that will be loaded for this slice. + /// Note: This may not be the final type for the slice. + EVT getLoadedType() const { + assert(DAG && "Missing context"); + LLVMContext &Ctxt = *DAG->getContext(); + return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); + } + + /// Get the alignment of the load used for this slice. + unsigned getAlignment() const { + unsigned Alignment = Origin->getAlignment(); + uint64_t Offset = getOffsetFromBase(); + if (Offset != 0) + Alignment = MinAlign(Alignment, Alignment + Offset); + return Alignment; + } + + /// Check if this slice can be rewritten with legal operations. + bool isLegal() const { + // An invalid slice is not legal. + if (!Origin || !Inst || !DAG) + return false; + + // Offsets are for indexed load only, we do not handle that. + if (!Origin->getOffset().isUndef()) + return false; + + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + + // Check that the type is legal. + EVT SliceType = getLoadedType(); + if (!TLI.isTypeLegal(SliceType)) + return false; + + // Check that the load is legal for this type. + if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) + return false; + + // Check that the offset can be computed. + // 1. Check its type. + EVT PtrType = Origin->getBasePtr().getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + // 2. Check that it fits in the immediate. + if (!TLI.isLegalAddImmediate(getOffsetFromBase())) + return false; + + // 3. Check that the computation is legal. + if (!TLI.isOperationLegal(ISD::ADD, PtrType)) + return false; + + // Check that the zext is legal if it needs one. + EVT TruncateType = Inst->getValueType(0); + if (TruncateType != SliceType && + !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) + return false; + + return true; + } + + /// Get the offset in bytes of this slice in the original chunk of + /// bits. + /// \pre DAG != nullptr. + uint64_t getOffsetFromBase() const { + assert(DAG && "Missing context."); + bool IsBigEndian = DAG->getDataLayout().isBigEndian(); + assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); + uint64_t Offset = Shift / 8; + unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; + assert(!(Origin->getValueSizeInBits(0) & 0x7) && + "The size of the original loaded type is not a multiple of a" + " byte."); + // If Offset is bigger than TySizeInBytes, it means we are loading all + // zeros. This should have been optimized before in the process. + assert(TySizeInBytes > Offset && + "Invalid shift amount for given loaded size"); + if (IsBigEndian) + Offset = TySizeInBytes - Offset - getLoadedSize(); + return Offset; + } + + /// Generate the sequence of instructions to load the slice + /// represented by this object and redirect the uses of this slice to + /// this new sequence of instructions. + /// \pre this->Inst && this->Origin are valid Instructions and this + /// object passed the legal check: LoadedSlice::isLegal returned true. + /// \return The last instruction of the sequence used to load the slice. + SDValue loadSlice() const { + assert(Inst && Origin && "Unable to replace a non-existing slice."); + const SDValue &OldBaseAddr = Origin->getBasePtr(); + SDValue BaseAddr = OldBaseAddr; + // Get the offset in that chunk of bytes w.r.t. the endianness. + int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); + assert(Offset >= 0 && "Offset too big to fit in int64_t!"); + if (Offset) { + // BaseAddr = BaseAddr + Offset. + EVT ArithType = BaseAddr.getValueType(); + SDLoc DL(Origin); + BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, + DAG->getConstant(Offset, DL, ArithType)); + } + + // Create the type of the loaded slice according to its size. + EVT SliceType = getLoadedType(); + + // Create the load for the slice. + SDValue LastInst = + DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, + Origin->getPointerInfo().getWithOffset(Offset), + getAlignment(), Origin->getMemOperand()->getFlags()); + // If the final type is not the same as the loaded type, this means that + // we have to pad with zero. Create a zero extend for that. + EVT FinalType = Inst->getValueType(0); + if (SliceType != FinalType) + LastInst = + DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); + return LastInst; + } + + /// Check if this slice can be merged with an expensive cross register + /// bank copy. E.g., + /// i = load i32 + /// f = bitcast i32 i to float + bool canMergeExpensiveCrossRegisterBankCopy() const { + if (!Inst || !Inst->hasOneUse()) + return false; + SDNode *Use = *Inst->use_begin(); + if (Use->getOpcode() != ISD::BITCAST) + return false; + assert(DAG && "Missing context"); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + EVT ResVT = Use->getValueType(0); + const TargetRegisterClass *ResRC = + TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent()); + const TargetRegisterClass *ArgRC = + TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(), + Use->getOperand(0)->isDivergent()); + if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // At this point, we know that we perform a cross-register-bank copy. + // Check if it is expensive. + const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); + // Assume bitcasts are cheap, unless both register classes do not + // explicitly share a common sub class. + if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) + return false; + + // Check if it will be merged with the load. + // 1. Check the alignment constraint. + unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( + ResVT.getTypeForEVT(*DAG->getContext())); + + if (RequiredAlignment > getAlignment()) + return false; + + // 2. Check that the load is a legal operation for that type. + if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // 3. Check that we do not have a zext in the way. + if (Inst->getValueType(0) != getLoadedType()) + return false; + + return true; + } +}; + +} // end anonymous namespace + +/// Check that all bits set in \p UsedBits form a dense region, i.e., +/// \p UsedBits looks like 0..0 1..1 0..0. +static bool areUsedBitsDense(const APInt &UsedBits) { + // If all the bits are one, this is dense! + if (UsedBits.isAllOnesValue()) + return true; + + // Get rid of the unused bits on the right. + APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); + // Get rid of the unused bits on the left. + if (NarrowedUsedBits.countLeadingZeros()) + NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); + // Check that the chunk of bits is completely used. + return NarrowedUsedBits.isAllOnesValue(); +} + +/// Check whether or not \p First and \p Second are next to each other +/// in memory. This means that there is no hole between the bits loaded +/// by \p First and the bits loaded by \p Second. +static bool areSlicesNextToEachOther(const LoadedSlice &First, + const LoadedSlice &Second) { + assert(First.Origin == Second.Origin && First.Origin && + "Unable to match different memory origins."); + APInt UsedBits = First.getUsedBits(); + assert((UsedBits & Second.getUsedBits()) == 0 && + "Slices are not supposed to overlap."); + UsedBits |= Second.getUsedBits(); + return areUsedBitsDense(UsedBits); +} + +/// Adjust the \p GlobalLSCost according to the target +/// paring capabilities and the layout of the slices. +/// \pre \p GlobalLSCost should account for at least as many loads as +/// there is in the slices in \p LoadedSlices. +static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, + LoadedSlice::Cost &GlobalLSCost) { + unsigned NumberOfSlices = LoadedSlices.size(); + // If there is less than 2 elements, no pairing is possible. + if (NumberOfSlices < 2) + return; + + // Sort the slices so that elements that are likely to be next to each + // other in memory are next to each other in the list. + llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + }); + const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); + // First (resp. Second) is the first (resp. Second) potentially candidate + // to be placed in a paired load. + const LoadedSlice *First = nullptr; + const LoadedSlice *Second = nullptr; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, + // Set the beginning of the pair. + First = Second) { + Second = &LoadedSlices[CurrSlice]; + + // If First is NULL, it means we start a new pair. + // Get to the next slice. + if (!First) + continue; + + EVT LoadedType = First->getLoadedType(); + + // If the types of the slices are different, we cannot pair them. + if (LoadedType != Second->getLoadedType()) + continue; + + // Check if the target supplies paired loads for this type. + unsigned RequiredAlignment = 0; + if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { + // move to the next pair, this type is hopeless. + Second = nullptr; + continue; + } + // Check if we meet the alignment requirement. + if (RequiredAlignment > First->getAlignment()) + continue; + + // Check that both loads are next to each other in memory. + if (!areSlicesNextToEachOther(*First, *Second)) + continue; + + assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); + --GlobalLSCost.Loads; + // Move to the next pair. + Second = nullptr; + } +} + +/// Check the profitability of all involved LoadedSlice. +/// Currently, it is considered profitable if there is exactly two +/// involved slices (1) which are (2) next to each other in memory, and +/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). +/// +/// Note: The order of the elements in \p LoadedSlices may be modified, but not +/// the elements themselves. +/// +/// FIXME: When the cost model will be mature enough, we can relax +/// constraints (1) and (2). +static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, + const APInt &UsedBits, bool ForCodeSize) { + unsigned NumberOfSlices = LoadedSlices.size(); + if (StressLoadSlicing) + return NumberOfSlices > 1; + + // Check (1). + if (NumberOfSlices != 2) + return false; + + // Check (2). + if (!areUsedBitsDense(UsedBits)) + return false; + + // Check (3). + LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); + // The original code has one big load. + OrigCost.Loads = 1; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { + const LoadedSlice &LS = LoadedSlices[CurrSlice]; + // Accumulate the cost of all the slices. + LoadedSlice::Cost SliceCost(LS, ForCodeSize); + GlobalSlicingCost += SliceCost; + + // Account as cost in the original configuration the gain obtained + // with the current slices. + OrigCost.addSliceGain(LS); + } + + // If the target supports paired load, adjust the cost accordingly. + adjustCostForPairing(LoadedSlices, GlobalSlicingCost); + return OrigCost > GlobalSlicingCost; +} + +/// If the given load, \p LI, is used only by trunc or trunc(lshr) +/// operations, split it in the various pieces being extracted. +/// +/// This sort of thing is introduced by SROA. +/// This slicing takes care not to insert overlapping loads. +/// \pre LI is a simple load (i.e., not an atomic or volatile load). +bool DAGCombiner::SliceUpLoad(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + LoadSDNode *LD = cast<LoadSDNode>(N); + if (!LD->isSimple() || !ISD::isNormalLoad(LD) || + !LD->getValueType(0).isInteger()) + return false; + + // Keep track of already used bits to detect overlapping values. + // In that case, we will just abort the transformation. + APInt UsedBits(LD->getValueSizeInBits(0), 0); + + SmallVector<LoadedSlice, 4> LoadedSlices; + + // Check if this load is used as several smaller chunks of bits. + // Basically, look for uses in trunc or trunc(lshr) and record a new chain + // of computation for each trunc. + for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) + continue; + + SDNode *User = *UI; + unsigned Shift = 0; + + // Check if this is a trunc(lshr). + if (User->getOpcode() == ISD::SRL && User->hasOneUse() && + isa<ConstantSDNode>(User->getOperand(1))) { + Shift = User->getConstantOperandVal(1); + User = *User->use_begin(); + } + + // At this point, User is a Truncate, iff we encountered, trunc or + // trunc(lshr). + if (User->getOpcode() != ISD::TRUNCATE) + return false; + + // The width of the type must be a power of 2 and greater than 8-bits. + // Otherwise the load cannot be represented in LLVM IR. + // Moreover, if we shifted with a non-8-bits multiple, the slice + // will be across several bytes. We do not support that. + unsigned Width = User->getValueSizeInBits(0); + if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) + return false; + + // Build the slice for this chain of computations. + LoadedSlice LS(User, LD, Shift, &DAG); + APInt CurrentUsedBits = LS.getUsedBits(); + + // Check if this slice overlaps with another. + if ((CurrentUsedBits & UsedBits) != 0) + return false; + // Update the bits used globally. + UsedBits |= CurrentUsedBits; + + // Check if the new slice would be legal. + if (!LS.isLegal()) + return false; + + // Record the slice. + LoadedSlices.push_back(LS); + } + + // Abort slicing if it does not seem to be profitable. + if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) + return false; + + ++SlicedLoads; + + // Rewrite each chain to use an independent load. + // By construction, each chain can be represented by a unique load. + + // Prepare the argument for the new token factor for all the slices. + SmallVector<SDValue, 8> ArgChains; + for (SmallVectorImpl<LoadedSlice>::const_iterator + LSIt = LoadedSlices.begin(), + LSItEnd = LoadedSlices.end(); + LSIt != LSItEnd; ++LSIt) { + SDValue SliceInst = LSIt->loadSlice(); + CombineTo(LSIt->Inst, SliceInst, true); + if (SliceInst.getOpcode() != ISD::LOAD) + SliceInst = SliceInst.getOperand(0); + assert(SliceInst->getOpcode() == ISD::LOAD && + "It takes more than a zext to get to the loaded slice!!"); + ArgChains.push_back(SliceInst.getValue(1)); + } + + SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, + ArgChains); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + AddToWorklist(Chain.getNode()); + return true; +} + +/// Check to see if V is (and load (ptr), imm), where the load is having +/// specific bytes cleared out. If so, return the byte size being masked out +/// and the shift amount. +static std::pair<unsigned, unsigned> +CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { + std::pair<unsigned, unsigned> Result(0, 0); + + // Check for the structure we're looking for. + if (V->getOpcode() != ISD::AND || + !isa<ConstantSDNode>(V->getOperand(1)) || + !ISD::isNormalLoad(V->getOperand(0).getNode())) + return Result; + + // Check the chain and pointer. + LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); + if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. + + // This only handles simple types. + if (V.getValueType() != MVT::i16 && + V.getValueType() != MVT::i32 && + V.getValueType() != MVT::i64) + return Result; + + // Check the constant mask. Invert it so that the bits being masked out are + // 0 and the bits being kept are 1. Use getSExtValue so that leading bits + // follow the sign bit for uniformity. + uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); + unsigned NotMaskLZ = countLeadingZeros(NotMask); + if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. + unsigned NotMaskTZ = countTrailingZeros(NotMask); + if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. + if (NotMaskLZ == 64) return Result; // All zero mask. + + // See if we have a continuous run of bits. If so, we have 0*1+0* + if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) + return Result; + + // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. + if (V.getValueType() != MVT::i64 && NotMaskLZ) + NotMaskLZ -= 64-V.getValueSizeInBits(); + + unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; + switch (MaskedBytes) { + case 1: + case 2: + case 4: break; + default: return Result; // All one mask, or 5-byte mask. + } + + // Verify that the first bit starts at a multiple of mask so that the access + // is aligned the same as the access width. + if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; + + // For narrowing to be valid, it must be the case that the load the + // immediately preceding memory operation before the store. + if (LD == Chain.getNode()) + ; // ok. + else if (Chain->getOpcode() == ISD::TokenFactor && + SDValue(LD, 1).hasOneUse()) { + // LD has only 1 chain use so they are no indirect dependencies. + if (!LD->isOperandOf(Chain.getNode())) + return Result; + } else + return Result; // Fail. + + Result.first = MaskedBytes; + Result.second = NotMaskTZ/8; + return Result; +} + +/// Check to see if IVal is something that provides a value as specified by +/// MaskInfo. If so, replace the specified store with a narrower store of +/// truncated IVal. +static SDValue +ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, + SDValue IVal, StoreSDNode *St, + DAGCombiner *DC) { + unsigned NumBytes = MaskInfo.first; + unsigned ByteShift = MaskInfo.second; + SelectionDAG &DAG = DC->getDAG(); + + // Check to see if IVal is all zeros in the part being masked in by the 'or' + // that uses this. If not, this is not a replacement. + APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), + ByteShift*8, (ByteShift+NumBytes)*8); + if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue(); + + // Check that it is legal on the target to do this. It is legal if the new + // VT we're shrinking to (i8/i16/i32) is legal or we're still before type + // legalization (and the target doesn't explicitly think this is a bad idea). + MVT VT = MVT::getIntegerVT(NumBytes * 8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!DC->isTypeLegal(VT)) + return SDValue(); + if (St->getMemOperand() && + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *St->getMemOperand())) + return SDValue(); + + // Okay, we can do this! Replace the 'St' store with a store of IVal that is + // shifted by ByteShift and truncated down to NumBytes. + if (ByteShift) { + SDLoc DL(IVal); + IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, + DAG.getConstant(ByteShift*8, DL, + DC->getShiftAmountTy(IVal.getValueType()))); + } + + // Figure out the offset for the store and the alignment of the access. + unsigned StOffset; + unsigned NewAlign = St->getAlignment(); + + if (DAG.getDataLayout().isLittleEndian()) + StOffset = ByteShift; + else + StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; + + SDValue Ptr = St->getBasePtr(); + if (StOffset) { + SDLoc DL(IVal); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), + Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); + NewAlign = MinAlign(NewAlign, StOffset); + } + + // Truncate down to the new size. + IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); + + ++OpsNarrowed; + return DAG + .getStore(St->getChain(), SDLoc(St), IVal, Ptr, + St->getPointerInfo().getWithOffset(StOffset), NewAlign); +} + +/// Look for sequence of load / op / store where op is one of 'or', 'xor', and +/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try +/// narrowing the load and store if it would end up being a win for performance +/// or code size. +SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + if (!ST->isSimple()) + return SDValue(); + + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + EVT VT = Value.getValueType(); + + if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) + return SDValue(); + + unsigned Opc = Value.getOpcode(); + + // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst + // is a byte mask indicating a consecutive number of bytes, check to see if + // Y is known to provide just those bytes. If so, we try to replace the + // load + replace + store sequence with a single (narrower) store, which makes + // the load dead. + if (Opc == ISD::OR) { + std::pair<unsigned, unsigned> MaskedLoad; + MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); + if (MaskedLoad.first) + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(1), ST,this)) + return NewST; + + // Or is commutative, so try swapping X and Y. + MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); + if (MaskedLoad.first) + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(0), ST,this)) + return NewST; + } + + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || + Value.getOperand(1).getOpcode() != ISD::Constant) + return SDValue(); + + SDValue N0 = Value.getOperand(0); + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + Chain == SDValue(N0.getNode(), 1)) { + LoadSDNode *LD = cast<LoadSDNode>(N0); + if (LD->getBasePtr() != Ptr || + LD->getPointerInfo().getAddrSpace() != + ST->getPointerInfo().getAddrSpace()) + return SDValue(); + + // Find the type to narrow it the load / op / store to. + SDValue N1 = Value.getOperand(1); + unsigned BitWidth = N1.getValueSizeInBits(); + APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); + if (Opc == ISD::AND) + Imm ^= APInt::getAllOnesValue(BitWidth); + if (Imm == 0 || Imm.isAllOnesValue()) + return SDValue(); + unsigned ShAmt = Imm.countTrailingZeros(); + unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; + unsigned NewBW = NextPowerOf2(MSB - ShAmt); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + // The narrowing should be profitable, the load/store operation should be + // legal (or custom) and the store size should be equal to the NewVT width. + while (NewBW < BitWidth && + (NewVT.getStoreSizeInBits() != NewBW || + !TLI.isOperationLegalOrCustom(Opc, NewVT) || + !TLI.isNarrowingProfitable(VT, NewVT))) { + NewBW = NextPowerOf2(NewBW); + NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + } + if (NewBW >= BitWidth) + return SDValue(); + + // If the lsb changed does not start at the type bitwidth boundary, + // start at the previous one. + if (ShAmt % NewBW) + ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; + APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, + std::min(BitWidth, ShAmt + NewBW)); + if ((Imm & Mask) == Imm) { + APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); + if (Opc == ISD::AND) + NewImm ^= APInt::getAllOnesValue(NewBW); + uint64_t PtrOff = ShAmt / 8; + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (DAG.getDataLayout().isBigEndian()) + PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + + unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); + Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); + if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) + return SDValue(); + + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), + Ptr.getValueType(), Ptr, + DAG.getConstant(PtrOff, SDLoc(LD), + Ptr.getValueType())); + SDValue NewLD = + DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, + LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, + DAG.getConstant(NewImm, SDLoc(Value), + NewVT)); + SDValue NewST = + DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, + ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); + + AddToWorklist(NewPtr.getNode()); + AddToWorklist(NewLD.getNode()); + AddToWorklist(NewVal.getNode()); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); + ++OpsNarrowed; + return NewST; + } + } + + return SDValue(); +} + +/// For a given floating point load / store pair, if the load value isn't used +/// by any other operations, then consider transforming the pair to integer +/// load / store operations if the target deems the transformation profitable. +SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Value = ST->getValue(); + if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && + Value.hasOneUse()) { + LoadSDNode *LD = cast<LoadSDNode>(Value); + EVT VT = LD->getMemoryVT(); + if (!VT.isFloatingPoint() || + VT != ST->getMemoryVT() || + LD->isNonTemporal() || + ST->isNonTemporal() || + LD->getPointerInfo().getAddrSpace() != 0 || + ST->getPointerInfo().getAddrSpace() != 0) + return SDValue(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || + !TLI.isOperationLegal(ISD::STORE, IntVT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) + return SDValue(); + + unsigned LDAlign = LD->getAlignment(); + unsigned STAlign = ST->getAlignment(); + Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); + unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); + if (LDAlign < ABIAlign || STAlign < ABIAlign) + return SDValue(); + + SDValue NewLD = + DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), LDAlign); + + SDValue NewST = + DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(), + ST->getPointerInfo(), STAlign); + + AddToWorklist(NewLD.getNode()); + AddToWorklist(NewST.getNode()); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); + ++LdStFP2Int; + return NewST; + } + + return SDValue(); +} + +// This is a helper function for visitMUL to check the profitability +// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). +// MulNode is the original multiply, AddNode is (add x, c1), +// and ConstNode is c2. +// +// If the (add x, c1) has multiple uses, we could increase +// the number of adds if we make this transformation. +// It would only be worth doing this if we can remove a +// multiply in the process. Check for that here. +// To illustrate: +// (A + c1) * c3 +// (A + c2) * c3 +// We're checking for cases where we have common "c3 * A" expressions. +bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, + SDValue &AddNode, + SDValue &ConstNode) { + APInt Val; + + // If the add only has one use, this would be OK to do. + if (AddNode.getNode()->hasOneUse()) + return true; + + // Walk all the users of the constant with which we're multiplying. + for (SDNode *Use : ConstNode->uses()) { + if (Use == MulNode) // This use is the one we're on right now. Skip it. + continue; + + if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. + SDNode *OtherOp; + SDNode *MulVar = AddNode.getOperand(0).getNode(); + + // OtherOp is what we're multiplying against the constant. + if (Use->getOperand(0) == ConstNode) + OtherOp = Use->getOperand(1).getNode(); + else + OtherOp = Use->getOperand(0).getNode(); + + // Check to see if multiply is with the same operand of our "add". + // + // ConstNode = CONST + // Use = ConstNode * A <-- visiting Use. OtherOp is A. + // ... + // AddNode = (A + c1) <-- MulVar is A. + // = AddNode * ConstNode <-- current visiting instruction. + // + // If we make this transformation, we will have a common + // multiply (ConstNode * A) that we can save. + if (OtherOp == MulVar) + return true; + + // Now check to see if a future expansion will give us a common + // multiply. + // + // ConstNode = CONST + // AddNode = (A + c1) + // ... = AddNode * ConstNode <-- current visiting instruction. + // ... + // OtherOp = (A + c2) + // Use = OtherOp * ConstNode <-- visiting Use. + // + // If we make this transformation, we will have a common + // multiply (CONST * A) after we also do the same transformation + // to the "t2" instruction. + if (OtherOp->getOpcode() == ISD::ADD && + DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && + OtherOp->getOperand(0).getNode() == MulVar) + return true; + } + } + + // Didn't find a case where this would be profitable. + return false; +} + +SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, + unsigned NumStores) { + SmallVector<SDValue, 8> Chains; + SmallPtrSet<const SDNode *, 8> Visited; + SDLoc StoreDL(StoreNodes[0].MemNode); + + for (unsigned i = 0; i < NumStores; ++i) { + Visited.insert(StoreNodes[i].MemNode); + } + + // don't include nodes that are children or repeated nodes. + for (unsigned i = 0; i < NumStores; ++i) { + if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second) + Chains.push_back(StoreNodes[i].MemNode->getChain()); + } + + assert(Chains.size() > 0 && "Chain should have generated a chain"); + return DAG.getTokenFactor(StoreDL, Chains); +} + +bool DAGCombiner::MergeStoresOfConstantsOrVecElts( + SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, + bool IsConstantSrc, bool UseVector, bool UseTrunc) { + // Make sure we have something to merge. + if (NumStores < 2) + return false; + + // The latest Node in the DAG. + SDLoc DL(StoreNodes[0].MemNode); + + int64_t ElementSizeBits = MemVT.getStoreSizeInBits(); + unsigned SizeInBits = NumStores * ElementSizeBits; + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + + EVT StoreTy; + if (UseVector) { + unsigned Elts = NumStores * NumMemElts; + // Get the type for the merged vector store. + StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + } else + StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + + SDValue StoredVal; + if (UseVector) { + if (IsConstantSrc) { + SmallVector<SDValue, 8> BuildVector; + for (unsigned I = 0; I != NumStores; ++I) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); + SDValue Val = St->getValue(); + // If constant is of the wrong type, convert it now. + if (MemVT != Val.getValueType()) { + Val = peekThroughBitcasts(Val); + // Deal with constants of wrong size. + if (ElementSizeBits != Val.getValueSizeInBits()) { + EVT IntMemVT = + EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); + if (isa<ConstantFPSDNode>(Val)) { + // Not clear how to truncate FP values. + return false; + } else if (auto *C = dyn_cast<ConstantSDNode>(Val)) + Val = DAG.getConstant(C->getAPIntValue() + .zextOrTrunc(Val.getValueSizeInBits()) + .zextOrTrunc(ElementSizeBits), + SDLoc(C), IntMemVT); + } + // Make sure correctly size type is the correct type. + Val = DAG.getBitcast(MemVT, Val); + } + BuildVector.push_back(Val); + } + StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS + : ISD::BUILD_VECTOR, + DL, StoreTy, BuildVector); + } else { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumStores; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue Val = peekThroughBitcasts(St->getValue()); + // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of + // type MemVT. If the underlying value is not the correct + // type, but it is an extraction of an appropriate vector we + // can recast Val to be of the correct type. This may require + // converting between EXTRACT_VECTOR_ELT and + // EXTRACT_SUBVECTOR. + if ((MemVT != Val.getValueType()) && + (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) { + EVT MemVTScalarTy = MemVT.getScalarType(); + // We may need to add a bitcast here to get types to line up. + if (MemVTScalarTy != Val.getValueType().getScalarType()) { + Val = DAG.getBitcast(MemVT, Val); + } else { + unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR + : ISD::EXTRACT_VECTOR_ELT; + SDValue Vec = Val.getOperand(0); + SDValue Idx = Val.getOperand(1); + Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx); + } + } + Ops.push_back(Val); + } + + // Build the extracted vector elements back into a vector. + StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS + : ISD::BUILD_VECTOR, + DL, StoreTy, Ops); + } + } else { + // We should always use a vector store when merging extracted vector + // elements, so this path implies a store of constants. + assert(IsConstantSrc && "Merged vector elements should use vector store"); + + APInt StoreInt(SizeInBits, 0); + + // Construct a single integer constant which is made of the smaller + // constant inputs. + bool IsLE = DAG.getDataLayout().isLittleEndian(); + for (unsigned i = 0; i < NumStores; ++i) { + unsigned Idx = IsLE ? (NumStores - 1 - i) : i; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); + + SDValue Val = St->getValue(); + Val = peekThroughBitcasts(Val); + StoreInt <<= ElementSizeBits; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { + StoreInt |= C->getAPIntValue() + .zextOrTrunc(ElementSizeBits) + .zextOrTrunc(SizeInBits); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { + StoreInt |= C->getValueAPF() + .bitcastToAPInt() + .zextOrTrunc(ElementSizeBits) + .zextOrTrunc(SizeInBits); + // If fp truncation is necessary give up for now. + if (MemVT.getSizeInBits() != ElementSizeBits) + return false; + } else { + llvm_unreachable("Invalid constant element type"); + } + } + + // Create the new Load and Store operations. + StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); + } + + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores); + + // make sure we use trunc store if it's necessary to be legal. + SDValue NewStore; + if (!UseTrunc) { + NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + FirstInChain->getAlignment()); + } else { // Must be realized as a trunc store + EVT LegalizedStoredValTy = + TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits(); + ConstantSDNode *C = cast<ConstantSDNode>(StoredVal); + SDValue ExtendedStoreVal = + DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, + LegalizedStoredValTy); + NewStore = DAG.getTruncStore( + NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, + FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } + + // Replace all merged stores with the new store. + for (unsigned i = 0; i < NumStores; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); + + AddToWorklist(NewChain.getNode()); + return true; +} + +void DAGCombiner::getStoreMergeCandidates( + StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes, + SDNode *&RootNode) { + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); + EVT MemVT = St->getMemoryVT(); + + SDValue Val = peekThroughBitcasts(St->getValue()); + // We must have a base and an offset. + if (!BasePtr.getBase().getNode()) + return; + + // Do not handle stores to undef base pointers. + if (BasePtr.getBase().isUndef()) + return; + + bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val); + bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); + bool IsLoadSrc = isa<LoadSDNode>(Val); + BaseIndexOffset LBasePtr; + // Match on loadbaseptr if relevant. + EVT LoadVT; + if (IsLoadSrc) { + auto *Ld = cast<LoadSDNode>(Val); + LBasePtr = BaseIndexOffset::match(Ld, DAG); + LoadVT = Ld->getMemoryVT(); + // Load and store should be the same type. + if (MemVT != LoadVT) + return; + // Loads must only have one use. + if (!Ld->hasNUsesOfValue(1, 0)) + return; + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Ld->isSimple() || Ld->isIndexed()) + return; + } + auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, + int64_t &Offset) -> bool { + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Other->isSimple() || Other->isIndexed()) + return false; + // Don't mix temporal stores with non-temporal stores. + if (St->isNonTemporal() != Other->isNonTemporal()) + return false; + SDValue OtherBC = peekThroughBitcasts(Other->getValue()); + // Allow merging constants of different types as integers. + bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) + : Other->getMemoryVT() != MemVT; + if (IsLoadSrc) { + if (NoTypeMatch) + return false; + // The Load's Base Ptr must also match + if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) { + BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG); + if (LoadVT != OtherLd->getMemoryVT()) + return false; + // Loads must only have one use. + if (!OtherLd->hasNUsesOfValue(1, 0)) + return false; + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!OtherLd->isSimple() || + OtherLd->isIndexed()) + return false; + // Don't mix temporal loads with non-temporal loads. + if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal()) + return false; + if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) + return false; + } else + return false; + } + if (IsConstantSrc) { + if (NoTypeMatch) + return false; + if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC))) + return false; + } + if (IsExtractVecSrc) { + // Do not merge truncated stores here. + if (Other->isTruncatingStore()) + return false; + if (!MemVT.bitsEq(OtherBC.getValueType())) + return false; + if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT && + OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return false; + } + Ptr = BaseIndexOffset::match(Other, DAG); + return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); + }; + + // Check if the pair of StoreNode and the RootNode already bail out many + // times which is over the limit in dependence check. + auto OverLimitInDependenceCheck = [&](SDNode *StoreNode, + SDNode *RootNode) -> bool { + auto RootCount = StoreRootCountMap.find(StoreNode); + if (RootCount != StoreRootCountMap.end() && + RootCount->second.first == RootNode && + RootCount->second.second > StoreMergeDependenceLimit) + return true; + return false; + }; + + // We looking for a root node which is an ancestor to all mergable + // stores. We search up through a load, to our root and then down + // through all children. For instance we will find Store{1,2,3} if + // St is Store1, Store2. or Store3 where the root is not a load + // which always true for nonvolatile ops. TODO: Expand + // the search to find all valid candidates through multiple layers of loads. + // + // Root + // |-------|-------| + // Load Load Store3 + // | | + // Store1 Store2 + // + // FIXME: We should be able to climb and + // descend TokenFactors to find candidates as well. + + RootNode = St->getChain().getNode(); + + unsigned NumNodesExplored = 0; + if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) { + RootNode = Ldn->getChain().getNode(); + for (auto I = RootNode->use_begin(), E = RootNode->use_end(); + I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored) + if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain + for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2) + if (I2.getOperandNo() == 0) + if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) { + BaseIndexOffset Ptr; + int64_t PtrDiff; + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) + StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); + } + } else + for (auto I = RootNode->use_begin(), E = RootNode->use_end(); + I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored) + if (I.getOperandNo() == 0) + if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { + BaseIndexOffset Ptr; + int64_t PtrDiff; + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) + StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); + } +} + +// We need to check that merging these stores does not cause a loop in +// the DAG. Any store candidate may depend on another candidate +// indirectly through its operand (we already consider dependencies +// through the chain). Check in parallel by searching up from +// non-chain operands of candidates. +bool DAGCombiner::checkMergeStoreCandidatesForDependencies( + SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, + SDNode *RootNode) { + // FIXME: We should be able to truncate a full search of + // predecessors by doing a BFS and keeping tabs the originating + // stores from which worklist nodes come from in a similar way to + // TokenFactor simplfication. + + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 8> Worklist; + + // RootNode is a predecessor to all candidates so we need not search + // past it. Add RootNode (peeking through TokenFactors). Do not count + // these towards size check. + + Worklist.push_back(RootNode); + while (!Worklist.empty()) { + auto N = Worklist.pop_back_val(); + if (!Visited.insert(N).second) + continue; // Already present in Visited. + if (N->getOpcode() == ISD::TokenFactor) { + for (SDValue Op : N->ops()) + Worklist.push_back(Op.getNode()); + } + } + + // Don't count pruning nodes towards max. + unsigned int Max = 1024 + Visited.size(); + // Search Ops of store candidates. + for (unsigned i = 0; i < NumStores; ++i) { + SDNode *N = StoreNodes[i].MemNode; + // Of the 4 Store Operands: + // * Chain (Op 0) -> We have already considered these + // in candidate selection and can be + // safely ignored + // * Value (Op 1) -> Cycles may happen (e.g. through load chains) + // * Address (Op 2) -> Merged addresses may only vary by a fixed constant, + // but aren't necessarily fromt the same base node, so + // cycles possible (e.g. via indexed store). + // * (Op 3) -> Represents the pre or post-indexing offset (or undef for + // non-indexed stores). Not constant on all targets (e.g. ARM) + // and so can participate in a cycle. + for (unsigned j = 1; j < N->getNumOperands(); ++j) + Worklist.push_back(N->getOperand(j).getNode()); + } + // Search through DAG. We can stop early if we find a store node. + for (unsigned i = 0; i < NumStores; ++i) + if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, + Max)) { + // If the searching bail out, record the StoreNode and RootNode in the + // StoreRootCountMap. If we have seen the pair many times over a limit, + // we won't add the StoreNode into StoreNodes set again. + if (Visited.size() >= Max) { + auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode]; + if (RootCount.first == RootNode) + RootCount.second++; + else + RootCount = {RootNode, 1}; + } + return false; + } + return true; +} + +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { + if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) + return false; + + EVT MemVT = St->getMemoryVT(); + int64_t ElementSizeBytes = MemVT.getStoreSize(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + + if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) + return false; + + bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + + // This function cannot currently deal with non-byte-sized memory sizes. + if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) + return false; + + if (!MemVT.isSimple()) + return false; + + // Perform an early exit check. Do not bother looking at stored values that + // are not constants, loads, or extracted vector elements. + SDValue StoredVal = peekThroughBitcasts(St->getValue()); + bool IsLoadSrc = isa<LoadSDNode>(StoredVal); + bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || + isa<ConstantFPSDNode>(StoredVal); + bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); + bool IsNonTemporalStore = St->isNonTemporal(); + bool IsNonTemporalLoad = + IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal(); + + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) + return false; + + SmallVector<MemOpLink, 8> StoreNodes; + SDNode *RootNode; + // Find potential store merge candidates by searching through chain sub-DAG + getStoreMergeCandidates(St, StoreNodes, RootNode); + + // Check if there is anything to merge. + if (StoreNodes.size() < 2) + return false; + + // Sort the memory operands according to their distance from the + // base pointer. + llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase; + }); + + // Store Merge attempts to merge the lowest stores. This generally + // works out as if successful, as the remaining stores are checked + // after the first collection of stores is merged. However, in the + // case that a non-mergeable store is found first, e.g., {p[-2], + // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent + // mergeable cases. To prevent this, we prune such stores from the + // front of StoreNodes here. + + bool RV = false; + while (StoreNodes.size() > 1) { + size_t StartIdx = 0; + while ((StartIdx + 1 < StoreNodes.size()) && + StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != + StoreNodes[StartIdx + 1].OffsetFromBase) + ++StartIdx; + + // Bail if we don't have enough candidates to merge. + if (StartIdx + 1 >= StoreNodes.size()) + return RV; + + if (StartIdx) + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); + + // Scan the memory operations on the chain and find the first + // non-consecutive store memory address. + unsigned NumConsecutiveStores = 1; + int64_t StartAddress = StoreNodes[0].OffsetFromBase; + // Check that the addresses are consecutive starting from the second + // element in the list of stores. + for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) { + int64_t CurrAddress = StoreNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + NumConsecutiveStores = i + 1; + } + + if (NumConsecutiveStores < 2) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumConsecutiveStores); + continue; + } + + // The node with the lowest store address. + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); + + // Store the constants into memory as one consecutive store. + if (IsConstantSrc) { + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned LastLegalType = 1; + unsigned LastLegalVectorType = 1; + bool LastIntegerTrunc = false; + bool NonZero = false; + unsigned FirstZeroAfterNonZero = NumConsecutiveStores; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = ST->getValue(); + bool IsElementZero = false; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) + IsElementZero = C->isNullValue(); + else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) + IsElementZero = C->getConstantFPValue()->isNullValue(); + if (IsElementZero) { + if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) + FirstZeroAfterNonZero = i; + } + NonZero |= !IsElementZero; + + // Find a legal type for the constant store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast = false; + + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; + + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) { + LastIntegerTrunc = false; + LastLegalType = i + 1; + // Or check whether a truncstore is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), + &IsFast) && + IsFast) { + LastIntegerTrunc = true; + LastLegalType = i + 1; + } + } + + // We only use vectors if the constant is known to be zero or the + // target allows it and the function is not marked with the + // noimplicitfloat attribute. + if ((!NonZero || + TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && + !NoVectors) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess( + Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && + IsFast) + LastLegalVectorType = i + 1; + } + } + + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; + unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + + // Check if we found a legal integer type that creates a meaningful + // merge. + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved or we've dropped a non-zero value. Drop as many + // candidates as we can here. + unsigned NumSkip = 1; + while ( + (NumSkip < NumConsecutiveStores) && + (NumSkip < FirstZeroAfterNonZero) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } + + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + continue; + } + + RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true, + UseVector, LastIntegerTrunc); + + // Remove merged stores for next iteration. + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } + continue; + } + + // When extracting multiple vector elements, try to store them + // in one vector store rather than a sequence of scalar stores. + if (IsExtractVecSrc) { + // Loop on Consecutive Stores on success. + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned NumStoresToMerge = 1; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = + EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast; + + // Break early when size is too large to be legal. + if (Ty.getSizeInBits() > MaximumLegalStoreInBits) + break; + + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess(Context, DL, Ty, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) + NumStoresToMerge = i + 1; + } + + // Check if we found a legal integer type creating a meaningful + // merge. + if (NumStoresToMerge < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved. Drop as many candidates as we can here. + unsigned NumSkip = 1; + while ( + (NumSkip < NumConsecutiveStores) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } + + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies( + StoreNodes, NumStoresToMerge, RootNode)) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumStoresToMerge); + NumConsecutiveStores -= NumStoresToMerge; + continue; + } + + RV |= MergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumStoresToMerge, false, true, false); + + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumStoresToMerge); + NumConsecutiveStores -= NumStoresToMerge; + } + continue; + } + + // Below we handle the case of multiple consecutive stores that + // come from multiple consecutive loads. We merge them into a single + // wide load and a single wide store. + + // Look for load nodes which are used by the stored values. + SmallVector<MemOpLink, 8> LoadNodes; + + // Find acceptable loads. Loads need to have the same chain (token factor), + // must not be zext, volatile, indexed, and they must be consecutive. + BaseIndexOffset LdBasePtr; + + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue Val = peekThroughBitcasts(St->getValue()); + LoadSDNode *Ld = cast<LoadSDNode>(Val); + + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); + // If this is not the first ptr that we check. + int64_t LdOffset = 0; + if (LdBasePtr.getBase().getNode()) { + // The base ptr must be the same. + if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) + break; + } else { + // Check that all other base pointers are the same as this one. + LdBasePtr = LdPtr; + } + + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdOffset)); + } + + while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { + // If we have load/store pair instructions and we only have two values, + // don't bother merging. + unsigned RequiredAlignment; + if (LoadNodes.size() == 2 && + TLI.hasPairedLoad(MemVT, RequiredAlignment) && + StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); + break; + } + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); + + // Scan the memory operations on the chain and find the first + // non-consecutive load memory address. These variables hold the index in + // the store node array. + + unsigned LastConsecutiveLoad = 1; + + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 1; + unsigned LastLegalIntegerType = 1; + bool isDereferenceable = true; + bool DoIntegerTruncate = false; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue FirstChain = FirstLoad->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads must share the same chain. + if (LoadNodes[i].MemNode->getChain() != FirstChain) + break; + + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; + + if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) + isDereferenceable = false; + + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; + + bool IsFastSt, IsFastLd; + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && + IsFastLd) { + LastLegalVectorType = i + 1; + } + + // Find a legal type for the integer store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(Context, SizeInBits); + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && + IsFastLd) { + LastLegalIntegerType = i + 1; + DoIntegerTruncate = false; + // Or check whether a truncstore and extload is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, + StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, + StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), + &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && + IsFastLd) { + LastLegalIntegerType = i + 1; + DoIntegerTruncate = true; + } + } + } + + // Only use vector types if the vector type is larger than the integer + // type. If they are the same, use integers. + bool UseVectorTy = + LastLegalVectorType > LastLegalIntegerType && !NoVectors; + unsigned LastLegalType = + std::max(LastLegalVectorType, LastLegalIntegerType); + + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = + std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); + NumElem = std::min(LastLegalType, NumElem); + + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have is if the alignment or either + // the load or store has improved. Drop as many candidates as we + // can here. + unsigned NumSkip = 1; + while ((NumSkip < LoadNodes.size()) && + (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } + + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + continue; + } + + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + // Find a legal type for the vector store. + unsigned Elts = NumElem * NumMemElts; + JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + } else { + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); + } + + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); + + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. + + SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); + AddToWorklist(NewStoreChain.getNode()); + + MachineMemOperand::Flags LdMMOFlags = + isDereferenceable ? MachineMemOperand::MODereferenceable + : MachineMemOperand::MONone; + if (IsNonTemporalLoad) + LdMMOFlags |= MachineMemOperand::MONonTemporal; + + MachineMemOperand::Flags StMMOFlags = + IsNonTemporalStore ? MachineMemOperand::MONonTemporal + : MachineMemOperand::MONone; + + SDValue NewLoad, NewStore; + if (UseVectorTy || !DoIntegerTruncate) { + NewLoad = + DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), + FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), + FirstLoadAlign, LdMMOFlags); + NewStore = DAG.getStore( + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags); + } else { // This must be the truncstore/extload case + EVT ExtendedTy = + TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); + NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, + FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), JointMemOpVT, + FirstLoadAlign, LdMMOFlags); + NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + JointMemOpVT, FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } + + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + } + + // Replace the all stores with the new store. Recursively remove + // corresponding value if its no longer used. + for (unsigned i = 0; i < NumElem; ++i) { + SDValue Val = StoreNodes[i].MemNode->getOperand(1); + CombineTo(StoreNodes[i].MemNode, NewStore); + if (Val.getNode()->use_empty()) + recursivelyDeleteUnusedNodes(Val.getNode()); + } + + RV = true; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } + } + return RV; +} + +SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { + SDLoc SL(ST); + SDValue ReplStore; + + // Replace the chain to avoid dependency. + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), + ST->getBasePtr(), ST->getMemoryVT(), + ST->getMemOperand()); + } else { + ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), + ST->getMemOperand()); + } + + // Create token to keep both nodes around. + SDValue Token = DAG.getNode(ISD::TokenFactor, SL, + MVT::Other, ST->getChain(), ReplStore); + + // Make sure the new and old chains are cleaned up. + AddToWorklist(Token.getNode()); + + // Don't add users to work list. + return CombineTo(ST, Token, false); +} + +SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { + SDValue Value = ST->getValue(); + if (Value.getOpcode() == ISD::TargetConstantFP) + return SDValue(); + + SDLoc DL(ST); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value); + + // NOTE: If the original store is volatile, this transform must not increase + // the number of stores. For example, on x86-32 an f64 can be stored in one + // processor operation but an i64 (which is not legal) requires two. So the + // transform should not be done in this case. + + SDValue Tmp; + switch (CFP->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("Unknown FP type"); + case MVT::f16: // We don't do this for these yet. + case MVT::f80: + case MVT::f128: + case MVT::ppcf128: + return SDValue(); + case MVT::f32: + if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + ; + Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue(), SDLoc(CFP), + MVT::i32); + return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); + } + + return SDValue(); + case MVT::f64: + if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && + ST->isSimple()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + ; + Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + getZExtValue(), SDLoc(CFP), MVT::i64); + return DAG.getStore(Chain, DL, Tmp, + Ptr, ST->getMemOperand()); + } + + if (ST->isSimple() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + // Many FP stores are not made apparent until after legalize, e.g. for + // argument passing. Since this is so common, custom legalize the + // 64-bit integer store into two 32-bit stores. + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + unsigned Alignment = ST->getAlignment(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); + AAMDNodes AAInfo = ST->getAAInfo(); + + SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), + ST->getAlignment(), MMOFlags, AAInfo); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, DL, Ptr.getValueType())); + Alignment = MinAlign(Alignment, 4U); + SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, + ST->getPointerInfo().getWithOffset(4), + Alignment, MMOFlags, AAInfo); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + St0, St1); + } + + return SDValue(); + } +} + +SDValue DAGCombiner::visitSTORE(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + + // If this is a store of a bit convert, store the input value if the + // resultant store does not need a higher alignment than the original. + if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && + ST->isUnindexed()) { + EVT SVT = Value.getOperand(0).getValueType(); + // If the store is volatile, we only want to change the store type if the + // resulting store is legal. Otherwise we might increase the number of + // memory accesses. We don't care if the original type was legal or not + // as we assume software couldn't rely on the number of accesses of an + // illegal type. + // TODO: May be able to relax for unordered atomics (see D66309) + if (((!LegalOperations && ST->isSimple()) || + TLI.isOperationLegal(ISD::STORE, SVT)) && + TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, + DAG, *ST->getMemOperand())) { + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + ST->getPointerInfo(), ST->getAlignment(), + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + } + } + + // Turn 'store undef, Ptr' -> nothing. + if (Value.isUndef() && ST->isUnindexed()) + return Chain; + + // Try to infer better alignment information than the store already has. + if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { + if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) { + SDValue NewStore = + DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), + ST->getMemoryVT(), Align, + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + // NewStore will always be N as we are only refining the alignment + assert(NewStore.getNode() == N); + (void)NewStore; + } + } + } + + // Try transforming a pair floating point load / store ops to integer + // load / store ops. + if (SDValue NewST = TransformFPLoadStorePair(N)) + return NewST; + + // Try transforming several stores into STORE (BSWAP). + if (SDValue Store = MatchStoreCombine(ST)) + return Store; + + if (ST->isUnindexed()) { + // Walk up chain skipping non-aliasing memory nodes, on this store and any + // adjacent stores. + if (findBetterNeighborChains(ST)) { + // replaceStoreChain uses CombineTo, which handled all of the worklist + // manipulation. Return the original node to not do anything else. + return SDValue(ST, 0); + } + Chain = ST->getChain(); + } + + // FIXME: is there such a thing as a truncating indexed store? + if (ST->isTruncatingStore() && ST->isUnindexed() && + Value.getValueType().isInteger() && + (!isa<ConstantSDNode>(Value) || + !cast<ConstantSDNode>(Value)->isOpaque())) { + APInt TruncDemandedBits = + APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), + ST->getMemoryVT().getScalarSizeInBits()); + + // See if we can simplify the input to this truncstore with knowledge that + // only the low bits are being used. For example: + // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + AddToWorklist(Value.getNode()); + if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits)) + return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), + ST->getMemOperand()); + + // Otherwise, see if we can simplify the operation with + // SimplifyDemandedBits, which only works if the value has a single use. + if (SimplifyDemandedBits(Value, TruncDemandedBits)) { + // Re-visit the store if anything changed and the store hasn't been merged + // with another node (N is deleted) SimplifyDemandedBits will add Value's + // node back to the worklist if necessary, but we also need to re-visit + // the Store node itself. + if (N->getOpcode() != ISD::DELETED_NODE) + AddToWorklist(N); + return SDValue(N, 0); + } + } + + // If this is a load followed by a store to the same location, then the store + // is dead/noop. + // TODO: Can relax for unordered atomics (see D66309) + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { + if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && + ST->isUnindexed() && ST->isSimple() && + // There can't be any side effects between the load and store, such as + // a call or store. + Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { + // The store is dead, remove it. + return Chain; + } + } + + // TODO: Can relax for unordered atomics (see D66309) + if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { + if (ST->isUnindexed() && ST->isSimple() && + ST1->isUnindexed() && ST1->isSimple()) { + if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && + ST->getMemoryVT() == ST1->getMemoryVT()) { + // If this is a store followed by a store with the same value to the + // same location, then the store is dead/noop. + return Chain; + } + + if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && + !ST1->getBasePtr().isUndef()) { + const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); + const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG); + unsigned STBitSize = ST->getMemoryVT().getSizeInBits(); + unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits(); + // If this is a store who's preceding store to a subset of the current + // location and no one other node is chained to that store we can + // effectively drop the store. Do not remove stores to undef as they may + // be used as data sinks. + if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) { + CombineTo(ST1, ST1->getChain()); + return SDValue(); + } + + // If ST stores to a subset of preceding store's write set, we may be + // able to fold ST's value into the preceding stored value. As we know + // the other uses of ST1's chain are unconcerned with ST, this folding + // will not affect those nodes. + int64_t BitOffset; + if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize, + BitOffset)) { + SDValue ChainValue = ST1->getValue(); + if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) { + if (auto *C = dyn_cast<ConstantSDNode>(Value)) { + APInt Val = C1->getAPIntValue(); + APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize); + // FIXME: Handle Big-endian mode. + if (!DAG.getDataLayout().isBigEndian()) { + Val.insertBits(InsertVal, BitOffset); + SDValue NewSDVal = + DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(), + C1->isTargetOpcode(), C1->isOpaque()); + SDNode *NewST1 = DAG.UpdateNodeOperands( + ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2), + ST1->getOperand(3)); + return CombineTo(ST, SDValue(NewST1, 0)); + } + } + } + } // End ST subset of ST1 case. + } + } + } + + // If this is an FP_ROUND or TRUNC followed by a store, fold this into a + // truncating store. We can do this even if this is already a truncstore. + if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) + && Value.getNode()->hasOneUse() && ST->isUnindexed() && + TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), + ST->getMemoryVT())) { + return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), + Ptr, ST->getMemoryVT(), ST->getMemOperand()); + } + + // Always perform this optimization before types are legal. If the target + // prefers, also try this after legalization to catch stores that were created + // by intrinsics or other nodes. + if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) { + while (true) { + // There can be multiple store sequences on the same chain. + // Keep trying to merge store sequences until we are unable to do so + // or until we merge the last store on the chain. + bool Changed = MergeConsecutiveStores(ST); + if (!Changed) break; + // Return N as merge only uses CombineTo and no worklist clean + // up is necessary. + if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N)) + return SDValue(N, 0); + } + } + + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // + // Make sure to do this only after attempting to merge stores in order to + // avoid changing the types of some subset of stores due to visit order, + // preventing their merging. + if (isa<ConstantFPSDNode>(ST->getValue())) { + if (SDValue NewSt = replaceStoreOfFPConstant(ST)) + return NewSt; + } + + if (SDValue NewSt = splitMergedValStore(ST)) + return NewSt; + + return ReduceLoadOpStoreWidth(N); +} + +SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { + const auto *LifetimeEnd = cast<LifetimeSDNode>(N); + if (!LifetimeEnd->hasOffset()) + return SDValue(); + + const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(), + LifetimeEnd->getOffset(), false); + + // We walk up the chains to find stores. + SmallVector<SDValue, 8> Chains = {N->getOperand(0)}; + while (!Chains.empty()) { + SDValue Chain = Chains.back(); + Chains.pop_back(); + if (!Chain.hasOneUse()) + continue; + switch (Chain.getOpcode()) { + case ISD::TokenFactor: + for (unsigned Nops = Chain.getNumOperands(); Nops;) + Chains.push_back(Chain.getOperand(--Nops)); + break; + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + // We can forward past any lifetime start/end that can be proven not to + // alias the node. + if (!isAlias(Chain.getNode(), N)) + Chains.push_back(Chain.getOperand(0)); + break; + case ISD::STORE: { + StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain); + // TODO: Can relax for unordered atomics (see D66309) + if (!ST->isSimple() || ST->isIndexed()) + continue; + const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); + // If we store purely within object bounds just before its lifetime ends, + // we can remove the store. + if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase, + ST->getMemoryVT().getStoreSizeInBits())) { + LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump(); + dbgs() << "\nwithin LIFETIME_END of : "; + LifetimeEndBase.dump(); dbgs() << "\n"); + CombineTo(ST, ST->getChain()); + return SDValue(N, 0); + } + } + } + } + return SDValue(); +} + +/// For the instruction sequence of store below, F and I values +/// are bundled together as an i64 value before being stored into memory. +/// Sometimes it is more efficent to generate separate stores for F and I, +/// which can remove the bitwise instructions or sink them to colder places. +/// +/// (store (or (zext (bitcast F to i32) to i64), +/// (shl (zext I to i64), 32)), addr) --> +/// (store F, addr) and (store I, addr+4) +/// +/// Similarly, splitting for other merged store can also be beneficial, like: +/// For pair of {i32, i32}, i64 store --> two i32 stores. +/// For pair of {i32, i16}, i64 store --> two i32 stores. +/// For pair of {i16, i16}, i32 store --> two i16 stores. +/// For pair of {i16, i8}, i32 store --> two i16 stores. +/// For pair of {i8, i8}, i16 store --> two i8 stores. +/// +/// We allow each target to determine specifically which kind of splitting is +/// supported. +/// +/// The store patterns are commonly seen from the simple code snippet below +/// if only std::make_pair(...) is sroa transformed before inlined into hoo. +/// void goo(const std::pair<int, float> &); +/// hoo() { +/// ... +/// goo(std::make_pair(tmp, ftmp)); +/// ... +/// } +/// +SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { + if (OptLevel == CodeGenOpt::None) + return SDValue(); + + // Can't change the number of memory accesses for a volatile store or break + // atomicity for an atomic one. + if (!ST->isSimple()) + return SDValue(); + + SDValue Val = ST->getValue(); + SDLoc DL(ST); + + // Match OR operand. + if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR) + return SDValue(); + + // Match SHL operand and get Lower and Higher parts of Val. + SDValue Op1 = Val.getOperand(0); + SDValue Op2 = Val.getOperand(1); + SDValue Lo, Hi; + if (Op1.getOpcode() != ISD::SHL) { + std::swap(Op1, Op2); + if (Op1.getOpcode() != ISD::SHL) + return SDValue(); + } + Lo = Op2; + Hi = Op1.getOperand(0); + if (!Op1.hasOneUse()) + return SDValue(); + + // Match shift amount to HalfValBitSize. + unsigned HalfValBitSize = Val.getValueSizeInBits() / 2; + ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1)); + if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize) + return SDValue(); + + // Lo and Hi are zero-extended from int with size less equal than 32 + // to i64. + if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() || + !Lo.getOperand(0).getValueType().isScalarInteger() || + Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize || + Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() || + !Hi.getOperand(0).getValueType().isScalarInteger() || + Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize) + return SDValue(); + + // Use the EVT of low and high parts before bitcast as the input + // of target query. + EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST) + ? Lo.getOperand(0).getValueType() + : Lo.getValueType(); + EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST) + ? Hi.getOperand(0).getValueType() + : Hi.getValueType(); + if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) + return SDValue(); + + // Start to split store. + unsigned Alignment = ST->getAlignment(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); + AAMDNodes AAInfo = ST->getAAInfo(); + + // Change the sizes of Lo and Hi's value types to HalfValBitSize. + EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0)); + Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0)); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + // Lower value store. + SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), + ST->getAlignment(), MMOFlags, AAInfo); + Ptr = + DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType())); + // Higher value store. + SDValue St1 = + DAG.getStore(St0, DL, Hi, Ptr, + ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), + Alignment / 2, MMOFlags, AAInfo); + return St1; +} + +/// Convert a disguised subvector insertion into a shuffle: +SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { + SDValue InsertVal = N->getOperand(1); + SDValue Vec = N->getOperand(0); + + // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex) + // --> (vector_shuffle X, Y) + if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && + InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(InsertVal.getOperand(1))) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode()); + ArrayRef<int> Mask = SVN->getMask(); + + SDValue X = Vec.getOperand(0); + SDValue Y = Vec.getOperand(1); + + // Vec's operand 0 is using indices from 0 to N-1 and + // operand 1 from N to 2N - 1, where N is the number of + // elements in the vectors. + int XOffset = -1; + if (InsertVal.getOperand(0) == X) { + XOffset = 0; + } else if (InsertVal.getOperand(0) == Y) { + XOffset = X.getValueType().getVectorNumElements(); + } + + if (XOffset != -1) { + SmallVector<int, 16> NewMask(Mask.begin(), Mask.end()); + + auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1)); + NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue(); + assert(NewMask[InsIndex] < + (int)(2 * Vec.getValueType().getVectorNumElements()) && + NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); + + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X, + Y, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; + } + } + + // insert_vector_elt V, (bitcast X from vector type), IdxC --> + // bitcast(shuffle (bitcast V), (extended X), Mask) + // Note: We do not use an insert_subvector node because that requires a + // legal subvector type. + if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || + !InsertVal.getOperand(0).getValueType().isVector()) + return SDValue(); + + SDValue SubVec = InsertVal.getOperand(0); + SDValue DestVec = N->getOperand(0); + EVT SubVecVT = SubVec.getValueType(); + EVT VT = DestVec.getValueType(); + unsigned NumSrcElts = SubVecVT.getVectorNumElements(); + unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits(); + unsigned NumMaskVals = ExtendRatio * NumSrcElts; + + // Step 1: Create a shuffle mask that implements this insert operation. The + // vector that we are inserting into will be operand 0 of the shuffle, so + // those elements are just 'i'. The inserted subvector is in the first + // positions of operand 1 of the shuffle. Example: + // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7} + SmallVector<int, 16> Mask(NumMaskVals); + for (unsigned i = 0; i != NumMaskVals; ++i) { + if (i / NumSrcElts == InsIndex) + Mask[i] = (i % NumSrcElts) + NumMaskVals; + else + Mask[i] = i; + } + + // Bail out if the target can not handle the shuffle we want to create. + EVT SubVecEltVT = SubVecVT.getVectorElementType(); + EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals); + if (!TLI.isShuffleMaskLegal(Mask, ShufVT)) + return SDValue(); + + // Step 2: Create a wide vector from the inserted source vector by appending + // undefined elements. This is the same size as our destination vector. + SDLoc DL(N); + SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT)); + ConcatOps[0] = SubVec; + SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps); + + // Step 3: Shuffle in the padded subvector. + SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec); + SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask); + AddToWorklist(PaddedSubV.getNode()); + AddToWorklist(DestVecBC.getNode()); + AddToWorklist(Shuf.getNode()); + return DAG.getBitcast(VT, Shuf); +} + +SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { + SDValue InVec = N->getOperand(0); + SDValue InVal = N->getOperand(1); + SDValue EltNo = N->getOperand(2); + SDLoc DL(N); + + // If the inserted element is an UNDEF, just use the input vector. + if (InVal.isUndef()) + return InVec; + + EVT VT = InVec.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + + // Remove redundant insertions: + // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1)) + return InVec; + + auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); + if (!IndexC) { + // If this is variable insert to undef vector, it might be better to splat: + // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... > + if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { + SmallVector<SDValue, 8> Ops(NumElts, InVal); + return DAG.getBuildVector(VT, DL, Ops); + } + return SDValue(); + } + + // We must know which element is being inserted for folds below here. + unsigned Elt = IndexC->getZExtValue(); + if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) + return Shuf; + + // Canonicalize insert_vector_elt dag nodes. + // Example: + // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) + // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) + // + // Do this only if the child insert_vector node has one use; also + // do this only if indices are both constants and Idx1 < Idx0. + if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() + && isa<ConstantSDNode>(InVec.getOperand(2))) { + unsigned OtherElt = InVec.getConstantOperandVal(2); + if (Elt < OtherElt) { + // Swap nodes. + SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, + InVec.getOperand(0), InVal, EltNo); + AddToWorklist(NewOp.getNode()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), + VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); + } + } + + // If we can't generate a legal BUILD_VECTOR, exit + if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return SDValue(); + + // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially + // be converted to a BUILD_VECTOR). Fill in the Ops vector with the + // vector elements. + SmallVector<SDValue, 8> Ops; + // Do not combine these two vectors if the output vector will not replace + // the input vector. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { + Ops.append(InVec.getNode()->op_begin(), + InVec.getNode()->op_end()); + } else if (InVec.isUndef()) { + Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType())); + } else { + return SDValue(); + } + assert(Ops.size() == NumElts && "Unexpected vector size"); + + // Insert the element + if (Elt < Ops.size()) { + // All the operands of BUILD_VECTOR must have the same type; + // we enforce that here. + EVT OpVT = Ops[0].getValueType(); + Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; + } + + // Return the new vector + return DAG.getBuildVector(VT, DL, Ops); +} + +SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, + SDValue EltNo, + LoadSDNode *OriginalLoad) { + assert(OriginalLoad->isSimple()); + + EVT ResultVT = EVE->getValueType(0); + EVT VecEltVT = InVecVT.getVectorElementType(); + unsigned Align = OriginalLoad->getAlignment(); + unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( + VecEltVT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) + return SDValue(); + + ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? + ISD::NON_EXTLOAD : ISD::EXTLOAD; + if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) + return SDValue(); + + Align = NewAlign; + + SDValue NewPtr = OriginalLoad->getBasePtr(); + SDValue Offset; + EVT PtrType = NewPtr.getValueType(); + MachinePointerInfo MPI; + SDLoc DL(EVE); + if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { + int Elt = ConstEltNo->getZExtValue(); + unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; + Offset = DAG.getConstant(PtrOff, DL, PtrType); + MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + } else { + Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); + Offset = DAG.getNode( + ISD::MUL, DL, PtrType, Offset, + DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); + // Discard the pointer info except the address space because the memory + // operand can't represent this new access since the offset is variable. + MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); + } + NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); + + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (ResultVT.bitsGT(VecEltVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, + VecEltVT) + ? ISD::ZEXTLOAD + : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, + OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, + Align, OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, + MPI, Align, OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); + Chain = Load.getValue(1); + if (ResultVT.bitsLT(VecEltVT)) + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); + else + Load = DAG.getBitcast(ResultVT, Load); + } + WorklistRemover DeadNodes(*this); + SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorklist(EVE); + // Since we're explicitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddUsersToWorklist(Load.getNode()); // Add users too + AddToWorklist(Load.getNode()); + ++OpsNarrowed; + return SDValue(EVE, 0); +} + +/// Transform a vector binary operation into a scalar binary operation by moving +/// the math/logic after an extract element of a vector. +static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, + bool LegalOperations) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Vec = ExtElt->getOperand(0); + SDValue Index = ExtElt->getOperand(1); + auto *IndexC = dyn_cast<ConstantSDNode>(Index); + if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() || + Vec.getNode()->getNumValues() != 1) + return SDValue(); + + // Targets may want to avoid this to prevent an expensive register transfer. + if (!TLI.shouldScalarizeBinop(Vec)) + return SDValue(); + + // Extracting an element of a vector constant is constant-folded, so this + // transform is just replacing a vector op with a scalar op while moving the + // extract. + SDValue Op0 = Vec.getOperand(0); + SDValue Op1 = Vec.getOperand(1); + if (isAnyConstantBuildVector(Op0, true) || + isAnyConstantBuildVector(Op1, true)) { + // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C' + // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC) + SDLoc DL(ExtElt); + EVT VT = ExtElt->getValueType(0); + SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index); + SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index); + return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { + SDValue VecOp = N->getOperand(0); + SDValue Index = N->getOperand(1); + EVT ScalarVT = N->getValueType(0); + EVT VecVT = VecOp.getValueType(); + if (VecOp.isUndef()) + return DAG.getUNDEF(ScalarVT); + + // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val + // + // This only really matters if the index is non-constant since other combines + // on the constant elements already work. + SDLoc DL(N); + if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT && + Index == VecOp.getOperand(2)) { + SDValue Elt = VecOp.getOperand(1); + return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt; + } + + // (vextract (scalar_to_vector val, 0) -> val + if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // Check if the result type doesn't match the inserted element type. A + // SCALAR_TO_VECTOR may truncate the inserted element and the + // EXTRACT_VECTOR_ELT may widen the extracted vector. + SDValue InOp = VecOp.getOperand(0); + if (InOp.getValueType() != ScalarVT) { + assert(InOp.getValueType().isInteger() && ScalarVT.isInteger()); + return DAG.getSExtOrTrunc(InOp, DL, ScalarVT); + } + return InOp; + } + + // extract_vector_elt of out-of-bounds element -> UNDEF + auto *IndexC = dyn_cast<ConstantSDNode>(Index); + unsigned NumElts = VecVT.getVectorNumElements(); + if (IndexC && IndexC->getAPIntValue().uge(NumElts)) + return DAG.getUNDEF(ScalarVT); + + // extract_vector_elt (build_vector x, y), 1 -> y + if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR && + TLI.isTypeLegal(VecVT) && + (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) { + SDValue Elt = VecOp.getOperand(IndexC->getZExtValue()); + EVT InEltVT = Elt.getValueType(); + + // Sometimes build_vector's scalar input types do not match result type. + if (ScalarVT == InEltVT) + return Elt; + + // TODO: It may be useful to truncate if free if the build_vector implicitly + // converts. + } + + // TODO: These transforms should not require the 'hasOneUse' restriction, but + // there are regressions on multiple targets without it. We can end up with a + // mess of scalar and vector code if we reduce only part of the DAG to scalar. + if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() && + VecOp.hasOneUse()) { + // The vector index of the LSBs of the source depend on the endian-ness. + bool IsLE = DAG.getDataLayout().isLittleEndian(); + unsigned ExtractIndex = IndexC->getZExtValue(); + // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x) + unsigned BCTruncElt = IsLE ? 0 : NumElts - 1; + SDValue BCSrc = VecOp.getOperand(0); + if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger()) + return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc); + + if (LegalTypes && BCSrc.getValueType().isInteger() && + BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt --> + // trunc i64 X to i32 + SDValue X = BCSrc.getOperand(0); + assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && + "Extract element and scalar to vector can't change element type " + "from FP to integer."); + unsigned XBitWidth = X.getValueSizeInBits(); + unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); + BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1; + + // An extract element return value type can be wider than its vector + // operand element type. In that case, the high bits are undefined, so + // it's possible that we may need to extend rather than truncate. + if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) { + assert(XBitWidth % VecEltBitWidth == 0 && + "Scalar bitwidth must be a multiple of vector element bitwidth"); + return DAG.getAnyExtOrTrunc(X, DL, ScalarVT); + } + } + } + + if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations)) + return BO; + + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. + // We only perform this optimization before the op legalization phase because + // we may introduce new vector instructions which are not backed by TD + // patterns. For example on AVX, extracting elements from a wide vector + // without using extract_subvector. However, if we can find an underlying + // scalar value, then we can always use that. + if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) { + auto *Shuf = cast<ShuffleVectorSDNode>(VecOp); + // Find the new index to extract from. + int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue()); + + // Extracting an undef index is undef. + if (OrigElt == -1) + return DAG.getUNDEF(ScalarVT); + + // Select the right vector half to extract from. + SDValue SVInVec; + if (OrigElt < (int)NumElts) { + SVInVec = VecOp.getOperand(0); + } else { + SVInVec = VecOp.getOperand(1); + OrigElt -= NumElts; + } + + if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { + SDValue InOp = SVInVec.getOperand(OrigElt); + if (InOp.getValueType() != ScalarVT) { + assert(InOp.getValueType().isInteger() && ScalarVT.isInteger()); + InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT); + } + + return InOp; + } + + // FIXME: We should handle recursing on other vector shuffles and + // scalar_to_vector here as well. + + if (!LegalOperations || + // FIXME: Should really be just isOperationLegalOrCustom. + TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) || + TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) { + EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec, + DAG.getConstant(OrigElt, DL, IndexTy)); + } + } + + // If only EXTRACT_VECTOR_ELT nodes use the source vector we can + // simplify it based on the (valid) extraction indices. + if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) { + return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Use->getOperand(0) == VecOp && + isa<ConstantSDNode>(Use->getOperand(1)); + })) { + APInt DemandedElts = APInt::getNullValue(NumElts); + for (SDNode *Use : VecOp->uses()) { + auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1)); + if (CstElt->getAPIntValue().ult(NumElts)) + DemandedElts.setBit(CstElt->getZExtValue()); + } + if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) { + // We simplified the vector operand of this extract element. If this + // extract is not dead, visit it again so it is folded properly. + if (N->getOpcode() != ISD::DELETED_NODE) + AddToWorklist(N); + return SDValue(N, 0); + } + } + + // Everything under here is trying to match an extract of a loaded value. + // If the result of load has to be truncated, then it's not necessarily + // profitable. + bool BCNumEltsChanged = false; + EVT ExtVT = VecVT.getVectorElementType(); + EVT LVT = ExtVT; + if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT)) + return SDValue(); + + if (VecOp.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!VecOp.hasOneUse()) + return SDValue(); + + EVT BCVT = VecOp.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (NumElts != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + VecOp = VecOp.getOperand(0); + ExtVT = BCVT.getVectorElementType(); + } + + // extract (vector load $addr), i --> load $addr + i * size + if (!LegalOperations && !IndexC && VecOp.hasOneUse() && + ISD::isNormalLoad(VecOp.getNode()) && + !Index->hasPredecessor(VecOp.getNode())) { + auto *VecLoad = dyn_cast<LoadSDNode>(VecOp); + if (VecLoad && VecLoad->isSimple()) + return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad); + } + + // Perform only after legalization to ensure build_vector / vector_shuffle + // optimizations have already been done. + if (!LegalOperations || !IndexC) + return SDValue(); + + // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) + int Elt = IndexC->getZExtValue(); + LoadSDNode *LN0 = nullptr; + if (ISD::isNormalLoad(VecOp.getNode())) { + LN0 = cast<LoadSDNode>(VecOp); + } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR && + VecOp.getOperand(0).getValueType() == ExtVT && + ISD::isNormalLoad(VecOp.getOperand(0).getNode())) { + // Don't duplicate a load with other uses. + if (!VecOp.hasOneUse()) + return SDValue(); + + LN0 = cast<LoadSDNode>(VecOp.getOperand(0)); + } + if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) { + // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) + // => + // (load $addr+1*size) + + // Don't duplicate a load with other uses. + if (!VecOp.hasOneUse()) + return SDValue(); + + // If the bit convert changed the number of elements, it is unsafe + // to examine the mask. + if (BCNumEltsChanged) + return SDValue(); + + // Select the input vector, guarding against out of range extract vector. + int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt); + VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1); + + if (VecOp.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!VecOp.hasOneUse()) + return SDValue(); + + VecOp = VecOp.getOperand(0); + } + if (ISD::isNormalLoad(VecOp.getNode())) { + LN0 = cast<LoadSDNode>(VecOp); + Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts; + Index = DAG.getConstant(Elt, DL, Index.getValueType()); + } + } + + // Make sure we found a non-volatile load and the extractelement is + // the only use. + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple()) + return SDValue(); + + // If Idx was -1 above, Elt is going to be -1, so just return undef. + if (Elt == -1) + return DAG.getUNDEF(LVT); + + return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0); +} + +// Simplify (build_vec (ext )) to (bitcast (build_vec )) +SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may create bit-casts which + // will be type-legalized to complex code sequences. + // We perform this optimization only before the operation legalizer because we + // may introduce illegal operations. + if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) + return SDValue(); + + unsigned NumInScalars = N->getNumOperands(); + SDLoc DL(N); + EVT VT = N->getValueType(0); + + // Check to see if this is a BUILD_VECTOR of a bunch of values + // which come from any_extend or zero_extend nodes. If so, we can create + // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR + // optimizations. We do not handle sign-extend because we can't fill the sign + // using shuffles. + EVT SourceType = MVT::Other; + bool AllAnyExt = true; + + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + // Ignore undef inputs. + if (In.isUndef()) continue; + + bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; + bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; + + // Abort if the element is not an extension. + if (!ZeroExt && !AnyExt) { + SourceType = MVT::Other; + break; + } + + // The input is a ZeroExt or AnyExt. Check the original type. + EVT InTy = In.getOperand(0).getValueType(); + + // Check that all of the widened source types are the same. + if (SourceType == MVT::Other) + // First time. + SourceType = InTy; + else if (InTy != SourceType) { + // Multiple income types. Abort. + SourceType = MVT::Other; + break; + } + + // Check if all of the extends are ANY_EXTENDs. + AllAnyExt &= AnyExt; + } + + // In order to have valid types, all of the inputs must be extended from the + // same source type and all of the inputs must be any or zero extend. + // Scalar sizes must be a power of two. + EVT OutScalarTy = VT.getScalarType(); + bool ValidTypes = SourceType != MVT::Other && + isPowerOf2_32(OutScalarTy.getSizeInBits()) && + isPowerOf2_32(SourceType.getSizeInBits()); + + // Create a new simpler BUILD_VECTOR sequence which other optimizations can + // turn into a single shuffle instruction. + if (!ValidTypes) + return SDValue(); + + bool isLE = DAG.getDataLayout().isLittleEndian(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): + DAG.getConstant(0, DL, SourceType); + + unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops(NewBVElems, Filler); + + // Populate the new build_vector + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Cast = N->getOperand(i); + assert((Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.isUndef()) && "Invalid cast opcode"); + SDValue In; + if (Cast.isUndef()) + In = DAG.getUNDEF(SourceType); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); + assert(VecVT.getSizeInBits() == VT.getSizeInBits() && + "Invalid vector size"); + // Check if the new vector type is legal. + if (!isTypeLegal(VecVT) || + (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) && + TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))) + return SDValue(); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); + + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorklist(BV.getNode()); + // Bitcast to the desired type. + return DAG.getBitcast(VT, BV); +} + +SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, + ArrayRef<int> VectorMask, + SDValue VecIn1, SDValue VecIn2, + unsigned LeftIdx, bool DidSplitVec) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); + + EVT VT = N->getValueType(0); + EVT InVT1 = VecIn1.getValueType(); + EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; + + unsigned NumElems = VT.getVectorNumElements(); + unsigned ShuffleNumElems = NumElems; + + // If we artificially split a vector in two already, then the offsets in the + // operands will all be based off of VecIn1, even those in VecIn2. + unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements(); + + // We can't generate a shuffle node with mismatched input and output types. + // Try to make the types match the type of the output. + if (InVT1 != VT || InVT2 != VT) { + if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) { + // If the output vector length is a multiple of both input lengths, + // we can concatenate them and pad the rest with undefs. + unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits(); + assert(NumConcats >= 2 && "Concat needs at least two inputs!"); + SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1)); + ConcatOps[0] = VecIn1; + ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1); + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + VecIn2 = SDValue(); + } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) { + if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems)) + return SDValue(); + + if (!VecIn2.getNode()) { + // If we only have one input vector, and it's twice the size of the + // output, split it in two. + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, + DAG.getConstant(NumElems, DL, IdxTy)); + VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx); + // Since we now have shorter input vectors, adjust the offset of the + // second vector's start. + Vec2Offset = NumElems; + } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) { + // VecIn1 is wider than the output, and we have another, possibly + // smaller input. Pad the smaller input with undefs, shuffle at the + // input vector width, and extract the output. + // The shuffle type is different than VT, so check legality again. + if (LegalOperations && + !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) + return SDValue(); + + // Legalizing INSERT_SUBVECTOR is tricky - you basically have to + // lower it back into a BUILD_VECTOR. So if the inserted type is + // illegal, don't even try. + if (InVT1 != InVT2) { + if (!TLI.isTypeLegal(InVT2)) + return SDValue(); + VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, + DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); + } + ShuffleNumElems = NumElems * 2; + } else { + // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider + // than VecIn1. We can't handle this for now - this case will disappear + // when we start sorting the vectors by type. + return SDValue(); + } + } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() && + InVT1.getSizeInBits() == VT.getSizeInBits()) { + SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2)); + ConcatOps[0] = VecIn2; + VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + } else { + // TODO: Support cases where the length mismatch isn't exactly by a + // factor of 2. + // TODO: Move this check upwards, so that if we have bad type + // mismatches, we don't create any DAG nodes. + return SDValue(); + } + } + + // Initialize mask to undef. + SmallVector<int, 8> Mask(ShuffleNumElems, -1); + + // Only need to run up to the number of elements actually used, not the + // total number of elements in the shuffle - if we are shuffling a wider + // vector, the high lanes should be set to undef. + for (unsigned i = 0; i != NumElems; ++i) { + if (VectorMask[i] <= 0) + continue; + + unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1); + if (VectorMask[i] == (int)LeftIdx) { + Mask[i] = ExtIndex; + } else if (VectorMask[i] == (int)LeftIdx + 1) { + Mask[i] = Vec2Offset + ExtIndex; + } + } + + // The type the input vectors may have changed above. + InVT1 = VecIn1.getValueType(); + + // If we already have a VecIn2, it should have the same type as VecIn1. + // If we don't, get an undef/zero vector of the appropriate type. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1); + assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type."); + + SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask); + if (ShuffleNumElems > NumElems) + Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx); + + return Shuffle; +} + +static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { + assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"); + + // First, determine where the build vector is not undef. + // TODO: We could extend this to handle zero elements as well as undefs. + int NumBVOps = BV->getNumOperands(); + int ZextElt = -1; + for (int i = 0; i != NumBVOps; ++i) { + SDValue Op = BV->getOperand(i); + if (Op.isUndef()) + continue; + if (ZextElt == -1) + ZextElt = i; + else + return SDValue(); + } + // Bail out if there's no non-undef element. + if (ZextElt == -1) + return SDValue(); + + // The build vector contains some number of undef elements and exactly + // one other element. That other element must be a zero-extended scalar + // extracted from a vector at a constant index to turn this into a shuffle. + // Also, require that the build vector does not implicitly truncate/extend + // its elements. + // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND. + EVT VT = BV->getValueType(0); + SDValue Zext = BV->getOperand(ZextElt); + if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() || + Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) || + Zext.getValueSizeInBits() != VT.getScalarSizeInBits()) + return SDValue(); + + // The zero-extend must be a multiple of the source size, and we must be + // building a vector of the same size as the source of the extract element. + SDValue Extract = Zext.getOperand(0); + unsigned DestSize = Zext.getValueSizeInBits(); + unsigned SrcSize = Extract.getValueSizeInBits(); + if (DestSize % SrcSize != 0 || + Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + // Create a shuffle mask that will combine the extracted element with zeros + // and undefs. + int ZextRatio = DestSize / SrcSize; + int NumMaskElts = NumBVOps * ZextRatio; + SmallVector<int, 32> ShufMask(NumMaskElts, -1); + for (int i = 0; i != NumMaskElts; ++i) { + if (i / ZextRatio == ZextElt) { + // The low bits of the (potentially translated) extracted element map to + // the source vector. The high bits map to zero. We will use a zero vector + // as the 2nd source operand of the shuffle, so use the 1st element of + // that vector (mask value is number-of-elements) for the high bits. + if (i % ZextRatio == 0) + ShufMask[i] = Extract.getConstantOperandVal(1); + else + ShufMask[i] = NumMaskElts; + } + + // Undef elements of the build vector remain undef because we initialize + // the shuffle mask with -1. + } + + // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... --> + // bitcast (shuffle V, ZeroVec, VectorMask) + SDLoc DL(BV); + EVT VecVT = Extract.getOperand(0).getValueType(); + SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0), + ZeroVec, ShufMask, DAG); + if (!Shuf) + return SDValue(); + return DAG.getBitcast(VT, Shuf); +} + +// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT +// operations. If the types of the vectors we're extracting from allow it, +// turn this into a vector_shuffle node. +SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. + if (!isTypeLegal(VT)) + return SDValue(); + + if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG)) + return V; + + // May only combine to shuffle after legalize if shuffle is legal. + if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) + return SDValue(); + + bool UsesZeroVector = false; + unsigned NumElems = N->getNumOperands(); + + // Record, for each element of the newly built vector, which input vector + // that element comes from. -1 stands for undef, 0 for the zero vector, + // and positive values for the input vectors. + // VectorMask maps each element to its vector number, and VecIn maps vector + // numbers to their initial SDValues. + + SmallVector<int, 8> VectorMask(NumElems, -1); + SmallVector<SDValue, 8> VecIn; + VecIn.push_back(SDValue()); + + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Op = N->getOperand(i); + + if (Op.isUndef()) + continue; + + // See if we can use a blend with a zero vector. + // TODO: Should we generalize this to a blend with an arbitrary constant + // vector? + if (isNullConstant(Op) || isNullFPConstant(Op)) { + UsesZeroVector = true; + VectorMask[i] = 0; + continue; + } + + // Not an undef or zero. If the input is something other than an + // EXTRACT_VECTOR_ELT with an in-range constant index, bail out. + if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Op.getOperand(1))) + return SDValue(); + SDValue ExtractedFromVec = Op.getOperand(0); + + const APInt &ExtractIdx = Op.getConstantOperandAPInt(1); + if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) + return SDValue(); + + // All inputs must have the same element type as the output. + if (VT.getVectorElementType() != + ExtractedFromVec.getValueType().getVectorElementType()) + return SDValue(); + + // Have we seen this input vector before? + // The vectors are expected to be tiny (usually 1 or 2 elements), so using + // a map back from SDValues to numbers isn't worth it. + unsigned Idx = std::distance( + VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec)); + if (Idx == VecIn.size()) + VecIn.push_back(ExtractedFromVec); + + VectorMask[i] = Idx; + } + + // If we didn't find at least one input vector, bail out. + if (VecIn.size() < 2) + return SDValue(); + + // If all the Operands of BUILD_VECTOR extract from same + // vector, then split the vector efficiently based on the maximum + // vector access index and adjust the VectorMask and + // VecIn accordingly. + bool DidSplitVec = false; + if (VecIn.size() == 2) { + unsigned MaxIndex = 0; + unsigned NearestPow2 = 0; + SDValue Vec = VecIn.back(); + EVT InVT = Vec.getValueType(); + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SmallVector<unsigned, 8> IndexVec(NumElems, 0); + + for (unsigned i = 0; i < NumElems; i++) { + if (VectorMask[i] <= 0) + continue; + unsigned Index = N->getOperand(i).getConstantOperandVal(1); + IndexVec[i] = Index; + MaxIndex = std::max(MaxIndex, Index); + } + + NearestPow2 = PowerOf2Ceil(MaxIndex); + if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 && + NumElems * 2 < NearestPow2) { + unsigned SplitSize = NearestPow2 / 2; + EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), SplitSize); + if (TLI.isTypeLegal(SplitVT)) { + SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, + DAG.getConstant(SplitSize, DL, IdxTy)); + SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, + DAG.getConstant(0, DL, IdxTy)); + VecIn.pop_back(); + VecIn.push_back(VecIn1); + VecIn.push_back(VecIn2); + DidSplitVec = true; + + for (unsigned i = 0; i < NumElems; i++) { + if (VectorMask[i] <= 0) + continue; + VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2; + } + } + } + } + + // TODO: We want to sort the vectors by descending length, so that adjacent + // pairs have similar length, and the longer vector is always first in the + // pair. + + // TODO: Should this fire if some of the input vectors has illegal type (like + // it does now), or should we let legalization run its course first? + + // Shuffle phase: + // Take pairs of vectors, and shuffle them so that the result has elements + // from these vectors in the correct places. + // For example, given: + // t10: i32 = extract_vector_elt t1, Constant:i64<0> + // t11: i32 = extract_vector_elt t2, Constant:i64<0> + // t12: i32 = extract_vector_elt t3, Constant:i64<0> + // t13: i32 = extract_vector_elt t1, Constant:i64<1> + // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13 + // We will generate: + // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2 + // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef + SmallVector<SDValue, 4> Shuffles; + for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) { + unsigned LeftIdx = 2 * In + 1; + SDValue VecLeft = VecIn[LeftIdx]; + SDValue VecRight = + (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue(); + + if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft, + VecRight, LeftIdx, DidSplitVec)) + Shuffles.push_back(Shuffle); + else + return SDValue(); + } + + // If we need the zero vector as an "ingredient" in the blend tree, add it + // to the list of shuffles. + if (UsesZeroVector) + Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT) + : DAG.getConstantFP(0.0, DL, VT)); + + // If we only have one shuffle, we're done. + if (Shuffles.size() == 1) + return Shuffles[0]; + + // Update the vector mask to point to the post-shuffle vectors. + for (int &Vec : VectorMask) + if (Vec == 0) + Vec = Shuffles.size() - 1; + else + Vec = (Vec - 1) / 2; + + // More than one shuffle. Generate a binary tree of blends, e.g. if from + // the previous step we got the set of shuffles t10, t11, t12, t13, we will + // generate: + // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2 + // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4 + // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6 + // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8 + // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11 + // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13 + // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21 + + // Make sure the initial size of the shuffle list is even. + if (Shuffles.size() % 2) + Shuffles.push_back(DAG.getUNDEF(VT)); + + for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) { + if (CurSize % 2) { + Shuffles[CurSize] = DAG.getUNDEF(VT); + CurSize++; + } + for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) { + int Left = 2 * In; + int Right = 2 * In + 1; + SmallVector<int, 8> Mask(NumElems, -1); + for (unsigned i = 0; i != NumElems; ++i) { + if (VectorMask[i] == Left) { + Mask[i] = i; + VectorMask[i] = In; + } else if (VectorMask[i] == Right) { + Mask[i] = i + NumElems; + VectorMask[i] = In; + } + } + + Shuffles[In] = + DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); + } + } + return Shuffles[0]; +} + +// Try to turn a build vector of zero extends of extract vector elts into a +// a vector zero extend and possibly an extract subvector. +// TODO: Support sign extend? +// TODO: Allow undef elements? +SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { + if (LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + + bool FoundZeroExtend = false; + SDValue Op0 = N->getOperand(0); + auto checkElem = [&](SDValue Op) -> int64_t { + unsigned Opc = Op.getOpcode(); + FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND); + if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && + Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0)) + if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1))) + return C->getZExtValue(); + return -1; + }; + + // Make sure the first element matches + // (zext (extract_vector_elt X, C)) + int64_t Offset = checkElem(Op0); + if (Offset < 0) + return SDValue(); + + unsigned NumElems = N->getNumOperands(); + SDValue In = Op0.getOperand(0).getOperand(0); + EVT InSVT = In.getValueType().getScalarType(); + EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems); + + // Don't create an illegal input type after type legalization. + if (LegalTypes && !TLI.isTypeLegal(InVT)) + return SDValue(); + + // Ensure all the elements come from the same vector and are adjacent. + for (unsigned i = 1; i != NumElems; ++i) { + if ((Offset + i) != checkElem(N->getOperand(i))) + return SDValue(); + } + + SDLoc DL(N); + In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In, + Op0.getOperand(0).getOperand(1)); + return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL, + VT, In); +} + +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + EVT VT = N->getValueType(0); + + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + + // If this is a splat of a bitcast from another vector, change to a + // concat_vector. + // For example: + // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) -> + // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X)))) + // + // If X is a build_vector itself, the concat can become a larger build_vector. + // TODO: Maybe this is useful for non-splat too? + if (!LegalOperations) { + if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) { + Splat = peekThroughBitcasts(Splat); + EVT SrcVT = Splat.getValueType(); + if (SrcVT.isVector()) { + unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getVectorElementType(), NumElts); + if (!LegalTypes || TLI.isTypeLegal(NewVT)) { + SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), + NewVT, Ops); + return DAG.getBitcast(VT, Concat); + } + } + } + } + + // A splat of a single element is a SPLAT_VECTOR if supported on the target. + if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand) + if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) { + assert(!V.isUndef() && "Splat of undef should have been handled earlier"); + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); + } + + // Check if we can express BUILD VECTOR via subvector extract. + if (!LegalTypes && (N->getNumOperands() > 1)) { + SDValue Op0 = N->getOperand(0); + auto checkElem = [&](SDValue Op) -> uint64_t { + if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) && + (Op0.getOperand(0) == Op.getOperand(0))) + if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return CNode->getZExtValue(); + return -1; + }; + + int Offset = checkElem(Op0); + for (unsigned i = 0; i < N->getNumOperands(); ++i) { + if (Offset + i != checkElem(N->getOperand(i))) { + Offset = -1; + break; + } + } + + if ((Offset == 0) && + (Op0.getOperand(0).getValueType() == N->getValueType(0))) + return Op0.getOperand(0); + if ((Offset != -1) && + ((Offset % N->getValueType(0).getVectorNumElements()) == + 0)) // IDX must be multiple of output size. + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), + Op0.getOperand(0), Op0.getOperand(1)); + } + + if (SDValue V = convertBuildVecZextToZext(N)) + return V; + + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) + return V; + + if (SDValue V = reduceBuildVecToShuffle(N)) + return V; + + return SDValue(); +} + +static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT OpVT = N->getOperand(0).getValueType(); + + // If the operands are legal vectors, leave them alone. + if (TLI.isTypeLegal(OpVT)) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + SmallVector<SDValue, 8> Ops; + + EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + + // Keep track of what we encounter. + bool AnyInteger = false; + bool AnyFP = false; + for (const SDValue &Op : N->ops()) { + if (ISD::BITCAST == Op.getOpcode() && + !Op.getOperand(0).getValueType().isVector()) + Ops.push_back(Op.getOperand(0)); + else if (ISD::UNDEF == Op.getOpcode()) + Ops.push_back(ScalarUndef); + else + return SDValue(); + + // Note whether we encounter an integer or floating point scalar. + // If it's neither, bail out, it could be something weird like x86mmx. + EVT LastOpVT = Ops.back().getValueType(); + if (LastOpVT.isFloatingPoint()) + AnyFP = true; + else if (LastOpVT.isInteger()) + AnyInteger = true; + else + return SDValue(); + } + + // If any of the operands is a floating point scalar bitcast to a vector, + // use floating point types throughout, and bitcast everything. + // Replace UNDEFs by another scalar UNDEF node, of the final desired type. + if (AnyFP) { + SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); + ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + if (AnyInteger) { + for (SDValue &Op : Ops) { + if (Op.getValueType() == SVT) + continue; + if (Op.isUndef()) + Op = ScalarUndef; + else + Op = DAG.getBitcast(SVT, Op); + } + } + } + + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, + VT.getSizeInBits() / SVT.getSizeInBits()); + return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); +} + +// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR +// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at +// most two distinct vectors the same size as the result, attempt to turn this +// into a legal shuffle. +static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); + int NumElts = VT.getVectorNumElements(); + int NumOpElts = OpVT.getVectorNumElements(); + + SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); + SmallVector<int, 8> Mask; + + for (SDValue Op : N->ops()) { + Op = peekThroughBitcasts(Op); + + // UNDEF nodes convert to UNDEF shuffle mask values. + if (Op.isUndef()) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // What vector are we extracting the subvector from and at what index? + SDValue ExtVec = Op.getOperand(0); + + // We want the EVT of the original extraction to correctly scale the + // extraction index. + EVT ExtVT = ExtVec.getValueType(); + ExtVec = peekThroughBitcasts(ExtVec); + + // UNDEF nodes convert to UNDEF shuffle mask values. + if (ExtVec.isUndef()) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + if (!isa<ConstantSDNode>(Op.getOperand(1))) + return SDValue(); + int ExtIdx = Op.getConstantOperandVal(1); + + // Ensure that we are extracting a subvector from a vector the same + // size as the result. + if (ExtVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + // Scale the subvector index to account for any bitcast. + int NumExtElts = ExtVT.getVectorNumElements(); + if (0 == (NumExtElts % NumElts)) + ExtIdx /= (NumExtElts / NumElts); + else if (0 == (NumElts % NumExtElts)) + ExtIdx *= (NumElts / NumExtElts); + else + return SDValue(); + + // At most we can reference 2 inputs in the final shuffle. + if (SV0.isUndef() || SV0 == ExtVec) { + SV0 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx); + } else if (SV1.isUndef() || SV1 == ExtVec) { + SV1 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx + NumElts); + } else { + return SDValue(); + } + } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), + DAG.getBitcast(VT, SV1), Mask, DAG); +} + +SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { + // If we only have one input vector, we don't need to do any concatenation. + if (N->getNumOperands() == 1) + return N->getOperand(0); + + // Check if all of the operands are undefs. + EVT VT = N->getValueType(0); + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + + // Optimize concat_vectors where all but the first of the vectors are undef. + if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { + return Op.isUndef(); + })) { + SDValue In = N->getOperand(0); + assert(In.getValueType().isVector() && "Must concat vectors"); + + // If the input is a concat_vectors, just make a larger concat by padding + // with smaller undefs. + if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) { + unsigned NumOps = N->getNumOperands() * In.getNumOperands(); + SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end()); + Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType())); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); + } + + SDValue Scalar = peekThroughOneUseBitcasts(In); + + // concat_vectors(scalar_to_vector(scalar), undef) -> + // scalar_to_vector(scalar) + if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR && + Scalar.hasOneUse()) { + EVT SVT = Scalar.getValueType().getVectorElementType(); + if (SVT == Scalar.getOperand(0).getValueType()) + Scalar = Scalar.getOperand(0); + } + + // concat_vectors(scalar, undef) -> scalar_to_vector(scalar) + if (!Scalar.getValueType().isVector()) { + // If the bitcast type isn't legal, it might be a trunc of a legal type; + // look through the trunc so we can still do the transform: + // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) + if (Scalar->getOpcode() == ISD::TRUNCATE && + !TLI.isTypeLegal(Scalar.getValueType()) && + TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) + Scalar = Scalar->getOperand(0); + + EVT SclTy = Scalar.getValueType(); + + if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) + return SDValue(); + + // Bail out if the vector size is not a multiple of the scalar size. + if (VT.getSizeInBits() % SclTy.getSizeInBits()) + return SDValue(); + + unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits(); + if (VNTNumElms < 2) + return SDValue(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms); + if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) + return SDValue(); + + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar); + return DAG.getBitcast(VT, Res); + } + } + + // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. + // We have already tested above for an UNDEF only concatenation. + // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) + // -> (BUILD_VECTOR A, B, ..., C, D, ...) + auto IsBuildVectorOrUndef = [](const SDValue &Op) { + return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); + }; + if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) { + SmallVector<SDValue, 8> Opnds; + EVT SVT = VT.getScalarType(); + + EVT MinVT = SVT; + if (!SVT.isFloatingPoint()) { + // If BUILD_VECTOR are from built from integer, they may have different + // operand types. Get the smallest type and truncate all operands to it. + bool FoundMinVT = false; + for (const SDValue &Op : N->ops()) + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + EVT OpSVT = Op.getOperand(0).getValueType(); + MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; + FoundMinVT = true; + } + assert(FoundMinVT && "Concat vector type mismatch"); + } + + for (const SDValue &Op : N->ops()) { + EVT OpVT = Op.getValueType(); + unsigned NumElts = OpVT.getVectorNumElements(); + + if (ISD::UNDEF == Op.getOpcode()) + Opnds.append(NumElts, DAG.getUNDEF(MinVT)); + + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + if (SVT.isFloatingPoint()) { + assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); + Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); + } else { + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back( + DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); + } + } + } + + assert(VT.getVectorNumElements() == Opnds.size() && + "Concat vector type mismatch"); + return DAG.getBuildVector(VT, SDLoc(N), Opnds); + } + + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. + if (SDValue V = combineConcatVectorOfScalars(N, DAG)) + return V; + + // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) + return V; + + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR + // nodes often generate nop CONCAT_VECTOR nodes. + // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that + // place the incoming vectors at the exact same location. + SDValue SingleSource = SDValue(); + unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + + if (Op.isUndef()) + continue; + + // Check if this is the identity extract: + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // Find the single incoming vector for the extract_subvector. + if (SingleSource.getNode()) { + if (Op.getOperand(0) != SingleSource) + return SDValue(); + } else { + SingleSource = Op.getOperand(0); + + // Check the source type is the same as the type of the result. + // If not, this concat may extend the vector, so we can not + // optimize it away. + if (SingleSource.getValueType() != N->getValueType(0)) + return SDValue(); + } + + auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + // The extract index must be constant. + if (!CS) + return SDValue(); + + // Check that we are reading from the identity index. + unsigned IdentityIndex = i * PartNumElem; + if (CS->getAPIntValue() != IdentityIndex) + return SDValue(); + } + + if (SingleSource.getNode()) + return SingleSource; + + return SDValue(); +} + +// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find +// if the subvector can be sourced for free. +static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) { + if (V.getOpcode() == ISD::INSERT_SUBVECTOR && + V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) { + return V.getOperand(1); + } + auto *IndexC = dyn_cast<ConstantSDNode>(Index); + if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && + V.getOperand(0).getValueType() == SubVT && + (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) { + uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements(); + return V.getOperand(SubIdx); + } + return SDValue(); +} + +static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, + SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue BinOp = Extract->getOperand(0); + unsigned BinOpcode = BinOp.getOpcode(); + if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1) + return SDValue(); + + EVT VecVT = BinOp.getValueType(); + SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1); + if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType()) + return SDValue(); + + SDValue Index = Extract->getOperand(1); + EVT SubVT = Extract->getValueType(0); + if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT)) + return SDValue(); + + SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT); + SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT); + + // TODO: We could handle the case where only 1 operand is being inserted by + // creating an extract of the other operand, but that requires checking + // number of uses and/or costs. + if (!Sub0 || !Sub1) + return SDValue(); + + // We are inserting both operands of the wide binop only to extract back + // to the narrow vector size. Eliminate all of the insert/extract: + // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y + return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1, + BinOp->getFlags()); +} + +/// If we are extracting a subvector produced by a wide binary operator try +/// to use a narrow binary operator and/or avoid concatenation and extraction. +static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { + // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share + // some of these bailouts with other transforms. + + if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG)) + return V; + + // The extract index must be a constant, so we can map it to a concat operand. + auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); + if (!ExtractIndexC) + return SDValue(); + + // We are looking for an optionally bitcasted wide vector binary operator + // feeding an extract subvector. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0)); + unsigned BOpcode = BinOp.getOpcode(); + if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1) + return SDValue(); + + // The binop must be a vector type, so we can extract some fraction of it. + EVT WideBVT = BinOp.getValueType(); + if (!WideBVT.isVector()) + return SDValue(); + + EVT VT = Extract->getValueType(0); + unsigned ExtractIndex = ExtractIndexC->getZExtValue(); + assert(ExtractIndex % VT.getVectorNumElements() == 0 && + "Extract index is not a multiple of the vector length."); + + // Bail out if this is not a proper multiple width extraction. + unsigned WideWidth = WideBVT.getSizeInBits(); + unsigned NarrowWidth = VT.getSizeInBits(); + if (WideWidth % NarrowWidth != 0) + return SDValue(); + + // Bail out if we are extracting a fraction of a single operation. This can + // occur because we potentially looked through a bitcast of the binop. + unsigned NarrowingRatio = WideWidth / NarrowWidth; + unsigned WideNumElts = WideBVT.getVectorNumElements(); + if (WideNumElts % NarrowingRatio != 0) + return SDValue(); + + // Bail out if the target does not support a narrower version of the binop. + EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), + WideNumElts / NarrowingRatio); + if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) + return SDValue(); + + // If extraction is cheap, we don't need to look at the binop operands + // for concat ops. The narrow binop alone makes this transform profitable. + // We can't just reuse the original extract index operand because we may have + // bitcasted. + unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements(); + unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); + EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); + if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) && + BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) { + // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N) + SDLoc DL(Extract); + SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT); + SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(0), NewExtIndex); + SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(1), NewExtIndex); + SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, + BinOp.getNode()->getFlags()); + return DAG.getBitcast(VT, NarrowBinOp); + } + + // Only handle the case where we are doubling and then halving. A larger ratio + // may require more than two narrow binops to replace the wide binop. + if (NarrowingRatio != 2) + return SDValue(); + + // TODO: The motivating case for this transform is an x86 AVX1 target. That + // target has temptingly almost legal versions of bitwise logic ops in 256-bit + // flavors, but no other 256-bit integer support. This could be extended to + // handle any binop, but that may require fixing/adding other folds to avoid + // codegen regressions. + if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR) + return SDValue(); + + // We need at least one concatenation operation of a binop operand to make + // this transform worthwhile. The concat must double the input vector sizes. + auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue { + if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2) + return V.getOperand(ConcatOpNum); + return SDValue(); + }; + SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0))); + SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1))); + + if (SubVecL || SubVecR) { + // If a binop operand was not the result of a concat, we must extract a + // half-sized operand for our new narrow binop: + // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN + // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC) + // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN + SDLoc DL(Extract); + SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT); + SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL) + : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(0), IndexC); + + SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR) + : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(1), IndexC); + + SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y); + return DAG.getBitcast(VT, NarrowBinOp); + } + + return SDValue(); +} + +/// If we are extracting a subvector from a wide vector load, convert to a +/// narrow load to eliminate the extraction: +/// (extract_subvector (load wide vector)) --> (load narrow vector) +static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { + // TODO: Add support for big-endian. The offset calculation must be adjusted. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + + auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); + auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); + if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || + !ExtIdx) + return SDValue(); + + // Allow targets to opt-out. + EVT VT = Extract->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) + return SDValue(); + + // The narrow load will be offset from the base address of the old load if + // we are extracting from something besides index 0 (little-endian). + SDLoc DL(Extract); + SDValue BaseAddr = Ld->getOperand(1); + unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); + + // TODO: Use "BaseIndexOffset" to make this more effective. + SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, + VT.getStoreSize()); + SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); + DAG.makeEquivalentMemoryOrdering(Ld, NewLd); + return NewLd; +} + +SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { + EVT NVT = N->getValueType(0); + SDValue V = N->getOperand(0); + + // Extract from UNDEF is UNDEF. + if (V.isUndef()) + return DAG.getUNDEF(NVT); + + if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)) + if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG)) + return NarrowLoad; + + // Combine an extract of an extract into a single extract_subvector. + // ext (ext X, C), 0 --> ext X, C + SDValue Index = N->getOperand(1); + if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && + V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) { + if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(), + V.getConstantOperandVal(1)) && + TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) { + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0), + V.getOperand(1)); + } + } + + // Try to move vector bitcast after extract_subv by scaling extraction index: + // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') + if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST && + V.getOperand(0).getValueType().isVector()) { + SDValue SrcOp = V.getOperand(0); + EVT SrcVT = SrcOp.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); + unsigned DestNumElts = V.getValueType().getVectorNumElements(); + if ((SrcNumElts % DestNumElts) == 0) { + unsigned SrcDestRatio = SrcNumElts / DestNumElts; + unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio; + EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), + NewExtNumElts); + if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { + unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio; + SDLoc DL(N); + SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL); + SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, + V.getOperand(0), NewIndex); + return DAG.getBitcast(NVT, NewExtract); + } + } + // TODO - handle (DestNumElts % SrcNumElts) == 0 + } + + // Combine: + // (extract_subvec (concat V1, V2, ...), i) + // Into: + // Vi if possible + // Only operand 0 is checked as 'concat' assumes all inputs of the same + // type. + if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) && + V.getOperand(0).getValueType() == NVT) { + unsigned Idx = N->getConstantOperandVal(1); + unsigned NumElems = NVT.getVectorNumElements(); + assert((Idx % NumElems) == 0 && + "IDX in concat is not a multiple of the result vector length."); + return V->getOperand(Idx / NumElems); + } + + V = peekThroughBitcasts(V); + + // If the input is a build vector. Try to make a smaller build vector. + if (V.getOpcode() == ISD::BUILD_VECTOR) { + if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) { + EVT InVT = V.getValueType(); + unsigned ExtractSize = NVT.getSizeInBits(); + unsigned EltSize = InVT.getScalarSizeInBits(); + // Only do this if we won't split any elements. + if (ExtractSize % EltSize == 0) { + unsigned NumElems = ExtractSize / EltSize; + EVT EltVT = InVT.getVectorElementType(); + EVT ExtractVT = NumElems == 1 ? EltVT + : EVT::getVectorVT(*DAG.getContext(), + EltVT, NumElems); + if ((Level < AfterLegalizeDAG || + (NumElems == 1 || + TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && + (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { + unsigned IdxVal = IdxC->getZExtValue(); + IdxVal *= NVT.getScalarSizeInBits(); + IdxVal /= EltSize; + + if (NumElems == 1) { + SDValue Src = V->getOperand(IdxVal); + if (EltVT != Src.getValueType()) + Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); + return DAG.getBitcast(NVT, Src); + } + + // Extract the pieces from the original build_vector. + SDValue BuildVec = DAG.getBuildVector( + ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems)); + return DAG.getBitcast(NVT, BuildVec); + } + } + } + } + + if (V.getOpcode() == ISD::INSERT_SUBVECTOR) { + // Handle only simple case where vector being inserted and vector + // being extracted are of same size. + EVT SmallVT = V.getOperand(1).getValueType(); + if (!NVT.bitsEq(SmallVT)) + return SDValue(); + + // Only handle cases where both indexes are constants. + auto *ExtIdx = dyn_cast<ConstantSDNode>(Index); + auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2)); + if (InsIdx && ExtIdx) { + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indices are equal or bit offsets are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() == + ExtIdx->getZExtValue() * NVT.getScalarSizeInBits()) + return DAG.getBitcast(NVT, V.getOperand(1)); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, + DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), + Index); + } + } + + if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) + return NarrowBOp; + + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles +/// followed by concatenation. Narrow vector ops may have better performance +/// than wide ops, and this can unlock further narrowing of other vector ops. +/// Targets can invert this transform later if it is not profitable. +static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, + SelectionDAG &DAG) { + SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1); + if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 || + N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 || + !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef()) + return SDValue(); + + // Split the wide shuffle mask into halves. Any mask element that is accessing + // operand 1 is offset down to account for narrowing of the vectors. + ArrayRef<int> Mask = Shuf->getMask(); + EVT VT = Shuf->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + unsigned HalfNumElts = NumElts / 2; + SmallVector<int, 16> Mask0(HalfNumElts, -1); + SmallVector<int, 16> Mask1(HalfNumElts, -1); + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] == -1) + continue; + int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts; + if (i < HalfNumElts) + Mask0[i] = M; + else + Mask1[i - HalfNumElts] = M; + } + + // Ask the target if this is a valid transform. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + HalfNumElts); + if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) || + !TLI.isShuffleMaskLegal(Mask1, HalfVT)) + return SDValue(); + + // shuffle (concat X, undef), (concat Y, undef), Mask --> + // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1) + SDValue X = N0.getOperand(0), Y = N1.getOperand(0); + SDLoc DL(Shuf); + SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0); + SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1); +} + +// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, +// or turn a shuffle of a single concat into simpler shuffle then concat. +static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + ArrayRef<int> Mask = SVN->getMask(); + + SmallVector<SDValue, 4> Ops; + EVT ConcatVT = N0.getOperand(0).getValueType(); + unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); + unsigned NumConcats = NumElts / NumElemsPerConcat; + + auto IsUndefMaskElt = [](int i) { return i == -1; }; + + // Special case: shuffle(concat(A,B)) can be more efficiently represented + // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high + // half vector elements. + if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() && + llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat), + IsUndefMaskElt)) { + N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), + N0.getOperand(1), + Mask.slice(0, NumElemsPerConcat)); + N1 = DAG.getUNDEF(ConcatVT); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); + } + + // Look at every vector that's inserted. We're looking for exact + // subvector-sized copies from a concatenated vector + for (unsigned I = 0; I != NumConcats; ++I) { + unsigned Begin = I * NumElemsPerConcat; + ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat); + + // Make sure we're dealing with a copy. + if (llvm::all_of(SubMask, IsUndefMaskElt)) { + Ops.push_back(DAG.getUNDEF(ConcatVT)); + continue; + } + + int OpIdx = -1; + for (int i = 0; i != (int)NumElemsPerConcat; ++i) { + if (IsUndefMaskElt(SubMask[i])) + continue; + if ((SubMask[i] % (int)NumElemsPerConcat) != i) + return SDValue(); + int EltOpIdx = SubMask[i] / NumElemsPerConcat; + if (0 <= OpIdx && EltOpIdx != OpIdx) + return SDValue(); + OpIdx = EltOpIdx; + } + assert(0 <= OpIdx && "Unknown concat_vectors op"); + + if (OpIdx < (int)N0.getNumOperands()) + Ops.push_back(N0.getOperand(OpIdx)); + else + Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands())); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); +} + +// Attempt to combine a shuffle of 2 inputs of 'scalar sources' - +// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. +// +// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always +// a simplification in some sense, but it isn't appropriate in general: some +// BUILD_VECTORs are substantially cheaper than others. The general case +// of a BUILD_VECTOR requires inserting each element individually (or +// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of +// all constants is a single constant pool load. A BUILD_VECTOR where each +// element is identical is a splat. A BUILD_VECTOR where most of the operands +// are undef lowers to a small number of element insertions. +// +// To deal with this, we currently use a bunch of mostly arbitrary heuristics. +// We don't fold shuffles where one side is a non-zero constant, and we don't +// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate +// non-constant operands. This seems to work out reasonably well in practice. +static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT = SVN->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + SDValue N0 = SVN->getOperand(0); + SDValue N1 = SVN->getOperand(1); + + if (!N0->hasOneUse()) + return SDValue(); + + // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as + // discussed above. + if (!N1.isUndef()) { + if (!N1->hasOneUse()) + return SDValue(); + + bool N0AnyConst = isAnyConstantBuildVector(N0); + bool N1AnyConst = isAnyConstantBuildVector(N1); + if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode())) + return SDValue(); + if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode())) + return SDValue(); + } + + // If both inputs are splats of the same value then we can safely merge this + // to a single BUILD_VECTOR with undef elements based on the shuffle mask. + bool IsSplat = false; + auto *BV0 = dyn_cast<BuildVectorSDNode>(N0); + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + if (BV0 && BV1) + if (SDValue Splat0 = BV0->getSplatValue()) + IsSplat = (Splat0 == BV1->getSplatValue()); + + SmallVector<SDValue, 8> Ops; + SmallSet<SDValue, 16> DuplicateOps; + for (int M : SVN->getMask()) { + SDValue Op = DAG.getUNDEF(VT.getScalarType()); + if (M >= 0) { + int Idx = M < (int)NumElts ? M : M - NumElts; + SDValue &S = (M < (int)NumElts ? N0 : N1); + if (S.getOpcode() == ISD::BUILD_VECTOR) { + Op = S.getOperand(Idx); + } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { + SDValue Op0 = S.getOperand(0); + Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType()); + } else { + // Operand can't be combined - bail out. + return SDValue(); + } + } + + // Don't duplicate a non-constant BUILD_VECTOR operand unless we're + // generating a splat; semantically, this is fine, but it's likely to + // generate low-quality code if the target can't reconstruct an appropriate + // shuffle. + if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op)) + if (!IsSplat && !DuplicateOps.insert(Op).second) + return SDValue(); + + Ops.push_back(Op); + } + + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + EVT SVT = VT.getScalarType(); + if (SVT.isInteger()) + for (SDValue &Op : Ops) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + if (SVT != VT.getScalarType()) + for (SDValue &Op : Ops) + Op = TLI.isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) + : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT); + return DAG.getBuildVector(VT, SDLoc(SVN), Ops); +} + +// Match shuffles that can be converted to any_vector_extend_in_reg. +// This is often generated during legalization. +// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) +// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. +static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { + EVT VT = SVN->getValueType(0); + bool IsBigEndian = DAG.getDataLayout().isBigEndian(); + + // TODO Add support for big-endian when we have a test case. + if (!VT.isInteger() || IsBigEndian) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + ArrayRef<int> Mask = SVN->getMask(); + SDValue N0 = SVN->getOperand(0); + + // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32)) + auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) { + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] < 0) + continue; + if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale)) + continue; + return false; + } + return true; + }; + + // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for + // power-of-2 extensions as they are the most likely. + for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { + // Check for non power of 2 vector sizes + if (NumElts % Scale != 0) + continue; + if (!isAnyExtend(Scale)) + continue; + + EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); + // Never create an illegal type. Only create unsupported operations if we + // are pre-legalization. + if (TLI.isTypeLegal(OutVT)) + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) + return DAG.getBitcast(VT, + DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, + SDLoc(SVN), OutVT, N0)); + } + + return SDValue(); +} + +// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of +// each source element of a large type into the lowest elements of a smaller +// destination type. This is often generated during legalization. +// If the source node itself was a '*_extend_vector_inreg' node then we should +// then be able to remove it. +static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG) { + EVT VT = SVN->getValueType(0); + bool IsBigEndian = DAG.getDataLayout().isBigEndian(); + + // TODO Add support for big-endian when we have a test case. + if (!VT.isInteger() || IsBigEndian) + return SDValue(); + + SDValue N0 = peekThroughBitcasts(SVN->getOperand(0)); + + unsigned Opcode = N0.getOpcode(); + if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG && + Opcode != ISD::SIGN_EXTEND_VECTOR_INREG && + Opcode != ISD::ZERO_EXTEND_VECTOR_INREG) + return SDValue(); + + SDValue N00 = N0.getOperand(0); + ArrayRef<int> Mask = SVN->getMask(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); + unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits(); + + if (ExtDstSizeInBits % ExtSrcSizeInBits != 0) + return SDValue(); + unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits; + + // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> + // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> + // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1> + auto isTruncate = [&Mask, &NumElts](unsigned Scale) { + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] < 0) + continue; + if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale)) + continue; + return false; + } + return true; + }; + + // At the moment we just handle the case where we've truncated back to the + // same size as before the extension. + // TODO: handle more extension/truncation cases as cases arise. + if (EltSizeInBits != ExtSrcSizeInBits) + return SDValue(); + + // We can remove *extend_vector_inreg only if the truncation happens at + // the same scale as the extension. + if (isTruncate(ExtScale)) + return DAG.getBitcast(VT, N00); + + return SDValue(); +} + +// Combine shuffles of splat-shuffles of the form: +// shuffle (shuffle V, undef, splat-mask), undef, M +// If splat-mask contains undef elements, we need to be careful about +// introducing undef's in the folded mask which are not the result of composing +// the masks of the shuffles. +static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, + SelectionDAG &DAG) { + if (!Shuf->getOperand(1).isUndef()) + return SDValue(); + auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0)); + if (!Splat || !Splat->isSplat()) + return SDValue(); + + ArrayRef<int> ShufMask = Shuf->getMask(); + ArrayRef<int> SplatMask = Splat->getMask(); + assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch"); + + // Prefer simplifying to the splat-shuffle, if possible. This is legal if + // every undef mask element in the splat-shuffle has a corresponding undef + // element in the user-shuffle's mask or if the composition of mask elements + // would result in undef. + // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask): + // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u] + // In this case it is not legal to simplify to the splat-shuffle because we + // may be exposing the users of the shuffle an undef element at index 1 + // which was not there before the combine. + // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u] + // In this case the composition of masks yields SplatMask, so it's ok to + // simplify to the splat-shuffle. + // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u] + // In this case the composed mask includes all undef elements of SplatMask + // and in addition sets element zero to undef. It is safe to simplify to + // the splat-shuffle. + auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask, + ArrayRef<int> SplatMask) { + for (unsigned i = 0, e = UserMask.size(); i != e; ++i) + if (UserMask[i] != -1 && SplatMask[i] == -1 && + SplatMask[UserMask[i]] != -1) + return false; + return true; + }; + if (CanSimplifyToExistingSplat(ShufMask, SplatMask)) + return Shuf->getOperand(0); + + // Create a new shuffle with a mask that is composed of the two shuffles' + // masks. + SmallVector<int, 32> NewMask; + for (int Idx : ShufMask) + NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]); + + return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat), + Splat->getOperand(0), Splat->getOperand(1), + NewMask); +} + +/// If the shuffle mask is taking exactly one element from the first vector +/// operand and passing through all other elements from the second vector +/// operand, return the index of the mask element that is choosing an element +/// from the first operand. Otherwise, return -1. +static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) { + int MaskSize = Mask.size(); + int EltFromOp0 = -1; + // TODO: This does not match if there are undef elements in the shuffle mask. + // Should we ignore undefs in the shuffle mask instead? The trade-off is + // removing an instruction (a shuffle), but losing the knowledge that some + // vector lanes are not needed. + for (int i = 0; i != MaskSize; ++i) { + if (Mask[i] >= 0 && Mask[i] < MaskSize) { + // We're looking for a shuffle of exactly one element from operand 0. + if (EltFromOp0 != -1) + return -1; + EltFromOp0 = i; + } else if (Mask[i] != i + MaskSize) { + // Nothing from operand 1 can change lanes. + return -1; + } + } + return EltFromOp0; +} + +/// If a shuffle inserts exactly one element from a source vector operand into +/// another vector operand and we can access the specified element as a scalar, +/// then we can eliminate the shuffle. +static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, + SelectionDAG &DAG) { + // First, check if we are taking one element of a vector and shuffling that + // element into another vector. + ArrayRef<int> Mask = Shuf->getMask(); + SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end()); + SDValue Op0 = Shuf->getOperand(0); + SDValue Op1 = Shuf->getOperand(1); + int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask); + if (ShufOp0Index == -1) { + // Commute mask and check again. + ShuffleVectorSDNode::commuteMask(CommutedMask); + ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask); + if (ShufOp0Index == -1) + return SDValue(); + // Commute operands to match the commuted shuffle mask. + std::swap(Op0, Op1); + Mask = CommutedMask; + } + + // The shuffle inserts exactly one element from operand 0 into operand 1. + // Now see if we can access that element as a scalar via a real insert element + // instruction. + // TODO: We can try harder to locate the element as a scalar. Examples: it + // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant. + assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && + "Shuffle mask value must be from operand 0"); + if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT) + return SDValue(); + + auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2)); + if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index]) + return SDValue(); + + // There's an existing insertelement with constant insertion index, so we + // don't need to check the legality/profitability of a replacement operation + // that differs at most in the constant value. The target should be able to + // lower any of those in a similar way. If not, legalization will expand this + // to a scalar-to-vector plus shuffle. + // + // Note that the shuffle may move the scalar from the position that the insert + // element used. Therefore, our new insert element occurs at the shuffle's + // mask index value, not the insert's index value. + // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C' + SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf), + Op0.getOperand(2).getValueType()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(), + Op1, Op0.getOperand(1), NewInsIndex); +} + +/// If we have a unary shuffle of a shuffle, see if it can be folded away +/// completely. This has the potential to lose undef knowledge because the first +/// shuffle may not have an undef mask element where the second one does. So +/// only call this after doing simplifications based on demanded elements. +static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) { + // shuf (shuf0 X, Y, Mask0), undef, Mask + auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0)); + if (!Shuf0 || !Shuf->getOperand(1).isUndef()) + return SDValue(); + + ArrayRef<int> Mask = Shuf->getMask(); + ArrayRef<int> Mask0 = Shuf0->getMask(); + for (int i = 0, e = (int)Mask.size(); i != e; ++i) { + // Ignore undef elements. + if (Mask[i] == -1) + continue; + assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value"); + + // Is the element of the shuffle operand chosen by this shuffle the same as + // the element chosen by the shuffle operand itself? + if (Mask0[Mask[i]] != Mask0[i]) + return SDValue(); + } + // Every element of this shuffle is identical to the result of the previous + // shuffle, so we can replace this value. + return Shuf->getOperand(0); +} + +SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); + + // Canonicalize shuffle undef, undef -> undef + if (N0.isUndef() && N1.isUndef()) + return DAG.getUNDEF(VT); + + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + + // Canonicalize shuffle v, v -> v, undef + if (N0 == N1) { + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) Idx -= NumElts; + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N0.isUndef()) + return DAG.getCommutedVectorShuffle(*SVN); + + // Remove references to rhs if it is undef + if (N1.isUndef()) { + bool Changed = false; + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) { + Idx = -1; + Changed = true; + } + NewMask.push_back(Idx); + } + if (Changed) + return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); + } + + if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) + return InsElt; + + // A shuffle of a single vector that is a splatted value can always be folded. + if (SDValue V = combineShuffleOfSplatVal(SVN, DAG)) + return V; + + // If it is a splat, check if the argument vector is another splat or a + // build_vector. + if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { + int SplatIndex = SVN->getSplatIndex(); + if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) && + TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) { + // splat (vector_bo L, R), Index --> + // splat (scalar_bo (extelt L, Index), (extelt R, Index)) + SDValue L = N0.getOperand(0), R = N0.getOperand(1); + SDLoc DL(N); + EVT EltVT = VT.getScalarType(); + SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL); + SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index); + SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index); + SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, + N0.getNode()->getFlags()); + SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO); + SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0); + return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask); + } + + // If this is a bit convert that changes the element type of the vector but + // not the number of vector elements, look through it. Be careful not to + // look though conversions that change things like v4f32 to v2f64. + SDNode *V = N0.getNode(); + if (V->getOpcode() == ISD::BITCAST) { + SDValue ConvInput = V->getOperand(0); + if (ConvInput.getValueType().isVector() && + ConvInput.getValueType().getVectorNumElements() == NumElts) + V = ConvInput.getNode(); + } + + if (V->getOpcode() == ISD::BUILD_VECTOR) { + assert(V->getNumOperands() == NumElts && + "BUILD_VECTOR has wrong number of operands"); + SDValue Base; + bool AllSame = true; + for (unsigned i = 0; i != NumElts; ++i) { + if (!V->getOperand(i).isUndef()) { + Base = V->getOperand(i); + break; + } + } + // Splat of <u, u, u, u>, return <u, u, u, u> + if (!Base.getNode()) + return N0; + for (unsigned i = 0; i != NumElts; ++i) { + if (V->getOperand(i) != Base) { + AllSame = false; + break; + } + } + // Splat of <x, x, x, x>, return <x, x, x, x> + if (AllSame) + return N0; + + // Canonicalize any other splat as a build_vector. + SDValue Splatted = V->getOperand(SplatIndex); + SmallVector<SDValue, 8> Ops(NumElts, Splatted); + SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (V->getValueType(0) != VT) + NewBV = DAG.getBitcast(VT, NewBV); + return NewBV; + } + } + + // Simplify source operands based on shuffle mask. + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + + // This is intentionally placed after demanded elements simplification because + // it could eliminate knowledge of undef elements created by this shuffle. + if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN)) + return ShufOp; + + // Match shuffles that can be converted to any_vector_extend_in_reg. + if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) + return V; + + // Combine "truncate_vector_in_reg" style shuffles. + if (SDValue V = combineTruncationShuffle(SVN, DAG)) + return V; + + if (N0.getOpcode() == ISD::CONCAT_VECTORS && + Level < AfterLegalizeVectorOps && + (N1.isUndef() || + (N1.getOpcode() == ISD::CONCAT_VECTORS && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { + if (SDValue V = partitionShuffleOfConcats(N, DAG)) + return V; + } + + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - + // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) + if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI)) + return Res; + + // If this shuffle only has a single input that is a bitcasted shuffle, + // attempt to merge the 2 shuffles and suitably bitcast the inputs/output + // back to their original types. + if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N1.isUndef() && Level < AfterLegalizeVectorOps && + TLI.isTypeLegal(VT)) { + auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { + if (Scale == 1) + return SmallVector<int, 8>(Mask.begin(), Mask.end()); + + SmallVector<int, 8> NewMask; + for (int M : Mask) + for (int s = 0; s != Scale; ++s) + NewMask.push_back(M < 0 ? -1 : Scale * M + s); + return NewMask; + }; + + SDValue BC0 = peekThroughOneUseBitcasts(N0); + if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { + EVT SVT = VT.getScalarType(); + EVT InnerVT = BC0->getValueType(0); + EVT InnerSVT = InnerVT.getScalarType(); + + // Determine which shuffle works with the smaller scalar type. + EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; + EVT ScaleSVT = ScaleVT.getScalarType(); + + if (TLI.isTypeLegal(ScaleVT) && + 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && + 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { + int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + + // Scale the shuffle masks to the smaller scalar type. + ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); + SmallVector<int, 8> InnerMask = + ScaleShuffleMask(InnerSVN->getMask(), InnerScale); + SmallVector<int, 8> OuterMask = + ScaleShuffleMask(SVN->getMask(), OuterScale); + + // Merge the shuffle masks. + SmallVector<int, 8> NewMask; + for (int M : OuterMask) + NewMask.push_back(M < 0 ? -1 : InnerMask[M]); + + // Test for shuffle mask legality over both commutations. + SDValue SV0 = BC0->getOperand(0); + SDValue SV1 = BC0->getOperand(1); + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + } + + if (LegalMask) { + SV0 = DAG.getBitcast(ScaleVT, SV0); + SV1 = DAG.getBitcast(ScaleVT, SV1); + return DAG.getBitcast( + VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); + } + } + } + } + + // Canonicalize shuffles according to rules: + // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) + // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) + // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) + if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && + N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + TLI.isTypeLegal(VT)) { + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(N1->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0 = N1->getOperand(0); + SDValue SV1 = N1->getOperand(1); + bool HasSameOp0 = N0 == SV0; + bool IsSV1Undef = SV1.isUndef(); + if (HasSameOp0 || IsSV1Undef || N0 == SV1) + // Commute the operands of this shuffle so that next rule + // will trigger. + return DAG.getCommutedVectorShuffle(*SVN); + } + + // Try to fold according to rules: + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) + // Don't try to fold shuffles with illegal type. + // Only fold if this shuffle is the only user of the other shuffle. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && + Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { + ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + + // Don't try to fold splats; they're likely to simplify somehow, or they + // might be free. + if (OtherSV->isSplat()) + return SDValue(); + + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0, SV1; + SmallVector<int, 4> Mask; + // Compute the combined shuffle mask for a shuffle with SV0 as the first + // operand, and SV1 as the second operand. + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx < 0) { + // Propagate Undef. + Mask.push_back(Idx); + continue; + } + + SDValue CurrentVec; + if (Idx < (int)NumElts) { + // This shuffle index refers to the inner shuffle N0. Lookup the inner + // shuffle mask to identify which vector is actually referenced. + Idx = OtherSV->getMaskElt(Idx); + if (Idx < 0) { + // Propagate Undef. + Mask.push_back(Idx); + continue; + } + + CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) + : OtherSV->getOperand(1); + } else { + // This shuffle index references an element within N1. + CurrentVec = N1; + } + + // Simple case where 'CurrentVec' is UNDEF. + if (CurrentVec.isUndef()) { + Mask.push_back(-1); + continue; + } + + // Canonicalize the shuffle index. We don't know yet if CurrentVec + // will be the first or second operand of the combined shuffle. + Idx = Idx % NumElts; + if (!SV0.getNode() || SV0 == CurrentVec) { + // Ok. CurrentVec is the left hand side. + // Update the mask accordingly. + SV0 = CurrentVec; + Mask.push_back(Idx); + continue; + } + + // Bail out if we cannot convert the shuffle pair into a single shuffle. + if (SV1.getNode() && SV1 != CurrentVec) + return SDValue(); + + // Ok. CurrentVec is the right hand side. + // Update the mask accordingly. + SV1 = CurrentVec; + Mask.push_back(Idx + NumElts); + } + + // Check if all indices in Mask are Undef. In case, propagate Undef. + bool isUndefMask = true; + for (unsigned i = 0; i != NumElts && isUndefMask; ++i) + isUndefMask &= Mask[i] < 0; + + if (isUndefMask) + return DAG.getUNDEF(VT); + + if (!SV0.getNode()) + SV0 = DAG.getUNDEF(VT); + if (!SV1.getNode()) + SV1 = DAG.getUNDEF(VT); + + // Avoid introducing shuffles with illegal mask. + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG); + } + + if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) + return V; + + return SDValue(); +} + +SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { + SDValue InVal = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern + // with a VECTOR_SHUFFLE and possible truncate. + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue InVec = InVal->getOperand(0); + SDValue EltNo = InVal->getOperand(1); + auto InVecT = InVec.getValueType(); + if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) { + SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); + int Elt = C0->getZExtValue(); + NewMask[0] = Elt; + // If we have an implict truncate do truncate here as long as it's legal. + // if it's not legal, this should + if (VT.getScalarType() != InVal.getValueType() && + InVal.getValueType().isScalarInteger() && + isTypeLegal(VT.getScalarType())) { + SDValue Val = + DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); + } + if (VT.getScalarType() == InVecT.getScalarType() && + VT.getVectorNumElements() <= InVecT.getVectorNumElements()) { + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec, + DAG.getUNDEF(InVecT), NewMask, DAG); + if (LegalShuffle) { + // If the initial vector is the correct size this shuffle is a + // valid result. + if (VT == InVecT) + return LegalShuffle; + // If not we must truncate the vector. + if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); + EVT SubVT = + EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), + VT.getVectorNumElements()); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, + LegalShuffle, ZeroIdx); + } + } + } + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + + // If inserting an UNDEF, just return the original vector. + if (N1.isUndef()) + return N0; + + // If this is an insert of an extracted vector into an undef vector, we can + // just use the input to the extract. + if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) + return N1.getOperand(0); + + // If we are inserting a bitcast value into an undef, with the same + // number of elements, just use the bitcast input of the extract. + // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> + // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2) + if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST && + N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getOperand(0).getOperand(1) == N2 && + N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == + VT.getVectorNumElements() && + N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() == + VT.getSizeInBits()) { + return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); + } + + // If both N1 and N2 are bitcast values on which insert_subvector + // would makes sense, pull the bitcast through. + // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 -> + // BITCAST (INSERT_SUBVECTOR N0 N1 N2) + if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) { + SDValue CN0 = N0.getOperand(0); + SDValue CN1 = N1.getOperand(0); + EVT CN0VT = CN0.getValueType(); + EVT CN1VT = CN1.getValueType(); + if (CN0VT.isVector() && CN1VT.isVector() && + CN0VT.getVectorElementType() == CN1VT.getVectorElementType() && + CN0VT.getVectorNumElements() == VT.getVectorNumElements()) { + SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), + CN0.getValueType(), CN0, CN1, N2); + return DAG.getBitcast(VT, NewINSERT); + } + } + + // Combine INSERT_SUBVECTORs where we are inserting to the same index. + // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) + // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) + if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && + N0.getOperand(1).getValueType() == N1.getValueType() && + N0.getOperand(2) == N2) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), + N1, N2); + + // Eliminate an intermediate insert into an undef vector: + // insert_subvector undef, (insert_subvector undef, X, 0), N2 --> + // insert_subvector undef, X, N2 + if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR && + N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2))) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0, + N1.getOperand(1), N2); + + if (!isa<ConstantSDNode>(N2)) + return SDValue(); + + uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue(); + + // Push subvector bitcasts to the output, adjusting the index as we go. + // insert_subvector(bitcast(v), bitcast(s), c1) + // -> bitcast(insert_subvector(v, s, c2)) + if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) && + N1.getOpcode() == ISD::BITCAST) { + SDValue N0Src = peekThroughBitcasts(N0); + SDValue N1Src = peekThroughBitcasts(N1); + EVT N0SrcSVT = N0Src.getValueType().getScalarType(); + EVT N1SrcSVT = N1Src.getValueType().getScalarType(); + if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) && + N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) { + EVT NewVT; + SDLoc DL(N); + SDValue NewIdx; + MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + LLVMContext &Ctx = *DAG.getContext(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) { + unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits(); + NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale); + NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT); + } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) { + unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits; + if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) { + NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale); + NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT); + } + } + if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) { + SDValue Res = DAG.getBitcast(NewVT, N0Src); + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx); + return DAG.getBitcast(VT, Res); + } + } + } + + // Canonicalize insert_subvector dag nodes. + // Example: + // (insert_subvector (insert_subvector A, Idx0), Idx1) + // -> (insert_subvector (insert_subvector A, Idx1), Idx0) + if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && + N1.getValueType() == N0.getOperand(1).getValueType() && + isa<ConstantSDNode>(N0.getOperand(2))) { + unsigned OtherIdx = N0.getConstantOperandVal(2); + if (InsIdx < OtherIdx) { + // Swap nodes. + SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, + N0.getOperand(0), N1, N2); + AddToWorklist(NewOp.getNode()); + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()), + VT, NewOp, N0.getOperand(1), N0.getOperand(2)); + } + } + + // If the input vector is a concatenation, and the insert replaces + // one of the pieces, we can optimize into a single concat_vectors. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() && + N0.getOperand(0).getValueType() == N1.getValueType()) { + unsigned Factor = N1.getValueType().getVectorNumElements(); + + SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end()); + Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1; + + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); + } + + // Simplify source operands based on insertion. + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold (fp_to_fp16 (fp16_to_fp op)) -> op + if (N0->getOpcode() == ISD::FP16_TO_FP) + return N0->getOperand(0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) + if (N0->getOpcode() == ISD::AND) { + ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); + if (AndConst && AndConst->getAPIntValue() == 0xffff) { + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), + N0.getOperand(0)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N0.getValueType(); + unsigned Opcode = N->getOpcode(); + + // VECREDUCE over 1-element vector is just an extract. + if (VT.getVectorNumElements() == 1) { + SDLoc dl(N); + SDValue Res = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + if (Res.getValueType() != N->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res); + return Res; + } + + // On an boolean vector an and/or reduction is the same as a umin/umax + // reduction. Convert them if the latter is legal while the former isn't. + if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) { + unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND + ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX; + if (!TLI.isOperationLegalOrCustom(Opcode, VT) && + TLI.isOperationLegalOrCustom(NewOpcode, VT) && + DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits()) + return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0); + } + + return SDValue(); +} + +/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle +/// with the destination vector and a zero vector. +/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> +/// vector_shuffle V, Zero, <0, 4, 2, 4> +SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "Unexpected opcode!"); + + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + SDValue RHS = peekThroughBitcasts(N->getOperand(1)); + SDLoc DL(N); + + // Make sure we're not running after operation legalization where it + // may have custom lowered the vector shuffles. + if (LegalOperations) + return SDValue(); + + if (RHS.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + EVT RVT = RHS.getValueType(); + unsigned NumElts = RHS.getNumOperands(); + + // Attempt to create a valid clear mask, splitting the mask into + // sub elements and checking to see if each is + // all zeros or all ones - suitable for shuffle masking. + auto BuildClearMask = [&](int Split) { + int NumSubElts = NumElts * Split; + int NumSubBits = RVT.getScalarSizeInBits() / Split; + + SmallVector<int, 8> Indices; + for (int i = 0; i != NumSubElts; ++i) { + int EltIdx = i / Split; + int SubIdx = i % Split; + SDValue Elt = RHS.getOperand(EltIdx); + if (Elt.isUndef()) { + Indices.push_back(-1); + continue; + } + + APInt Bits; + if (isa<ConstantSDNode>(Elt)) + Bits = cast<ConstantSDNode>(Elt)->getAPIntValue(); + else if (isa<ConstantFPSDNode>(Elt)) + Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt(); + else + return SDValue(); + + // Extract the sub element from the constant bit mask. + if (DAG.getDataLayout().isBigEndian()) { + Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits); + } else { + Bits.lshrInPlace(SubIdx * NumSubBits); + } + + if (Split > 1) + Bits = Bits.trunc(NumSubBits); + + if (Bits.isAllOnesValue()) + Indices.push_back(i); + else if (Bits == 0) + Indices.push_back(i + NumSubElts); + else + return SDValue(); + } + + // Let's see if the target supports this vector_shuffle. + EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); + EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); + if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) + return SDValue(); + + SDValue Zero = DAG.getConstant(0, DL, ClearVT); + return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL, + DAG.getBitcast(ClearVT, LHS), + Zero, Indices)); + }; + + // Determine maximum split level (byte level masking). + int MaxSplit = 1; + if (RVT.getScalarSizeInBits() % 8 == 0) + MaxSplit = RVT.getScalarSizeInBits() / 8; + + for (int Split = 1; Split <= MaxSplit; ++Split) + if (RVT.getScalarSizeInBits() % Split == 0) + if (SDValue S = BuildClearMask(Split)) + return S; + + return SDValue(); +} + +/// If a vector binop is performed on splat values, it may be profitable to +/// extract, scalarize, and insert/splat. +static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + unsigned Opcode = N->getOpcode(); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // TODO: Remove/replace the extract cost check? If the elements are available + // as scalars, then there may be no extract cost. Should we ask if + // inserting a scalar back into a vector is cheap instead? + int Index0, Index1; + SDValue Src0 = DAG.getSplatSourceVector(N0, Index0); + SDValue Src1 = DAG.getSplatSourceVector(N1, Index1); + if (!Src0 || !Src1 || Index0 != Index1 || + Src0.getValueType().getVectorElementType() != EltVT || + Src1.getValueType().getVectorElementType() != EltVT || + !TLI.isExtractVecEltCheap(VT, Index0) || + !TLI.isOperationLegalOrCustom(Opcode, EltVT)) + return SDValue(); + + SDLoc DL(N); + SDValue IndexC = + DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC); + SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC); + SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()); + + // If all lanes but 1 are undefined, no need to splat the scalar result. + // TODO: Keep track of undefs and use that info in the general case. + if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() && + count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 && + count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) { + // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) --> + // build_vec ..undef, (bo X, Y), undef... + SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT)); + Ops[Index0] = ScalarBO; + return DAG.getBuildVector(VT, DL, Ops); + } + + // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index + SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO); + return DAG.getBuildVector(VT, DL, Ops); +} + +/// Visit a binary vector operation, like ADD. +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { + assert(N->getValueType(0).isVector() && + "SimplifyVBinOp only works on vectors!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Ops[] = {LHS, RHS}; + EVT VT = N->getValueType(0); + unsigned Opcode = N->getOpcode(); + + // See if we can constant fold the vector operation. + if (SDValue Fold = DAG.FoldConstantVectorArithmetic( + Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) + return Fold; + + // Move unary shuffles with identical masks after a vector binop: + // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask)) + // --> shuffle (VBinOp A, B), Undef, Mask + // This does not require type legality checks because we are creating the + // same types of operations that are in the original sequence. We do have to + // restrict ops like integer div that have immediate UB (eg, div-by-zero) + // though. This code is adapted from the identical transform in instcombine. + if (Opcode != ISD::UDIV && Opcode != ISD::SDIV && + Opcode != ISD::UREM && Opcode != ISD::SREM && + Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) { + auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS); + auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS); + if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) && + LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() && + (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) { + SDLoc DL(N); + SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0), + RHS.getOperand(0), N->getFlags()); + SDValue UndefV = LHS.getOperand(1); + return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask()); + } + } + + // The following pattern is likely to emerge with vector reduction ops. Moving + // the binary operation ahead of insertion may allow using a narrower vector + // instruction that has better performance than the wide version of the op: + // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z + if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() && + RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() && + LHS.getOperand(2) == RHS.getOperand(2) && + (LHS.hasOneUse() || RHS.hasOneUse())) { + SDValue X = LHS.getOperand(1); + SDValue Y = RHS.getOperand(1); + SDValue Z = LHS.getOperand(2); + EVT NarrowVT = X.getValueType(); + if (NarrowVT == Y.getValueType() && + TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { + // (binop undef, undef) may not return undef, so compute that result. + SDLoc DL(N); + SDValue VecC = + DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT)); + SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z); + } + } + + // Make sure all but the first op are undef or constant. + auto ConcatWithConstantOrUndef = [](SDValue Concat) { + return Concat.getOpcode() == ISD::CONCAT_VECTORS && + std::all_of(std::next(Concat->op_begin()), Concat->op_end(), + [](const SDValue &Op) { + return Op.isUndef() || + ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); + }); + }; + + // The following pattern is likely to emerge with vector reduction ops. Moving + // the binary operation ahead of the concat may allow using a narrower vector + // instruction that has better performance than the wide version of the op: + // VBinOp (concat X, undef/constant), (concat Y, undef/constant) --> + // concat (VBinOp X, Y), VecC + if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) && + (LHS.hasOneUse() || RHS.hasOneUse())) { + EVT NarrowVT = LHS.getOperand(0).getValueType(); + if (NarrowVT == RHS.getOperand(0).getValueType() && + TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { + SDLoc DL(N); + unsigned NumOperands = LHS.getNumOperands(); + SmallVector<SDValue, 4> ConcatOps; + for (unsigned i = 0; i != NumOperands; ++i) { + // This constant fold for operands 1 and up. + ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i), + RHS.getOperand(i))); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + } + } + + if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) + return V; + + return SDValue(); +} + +SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2) { + assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); + + SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + + // If we got a simplified select_cc node back from SimplifySelectCC, then + // break it down into a new SETCC node, and a new SELECT node, and then return + // the SELECT node, since we were called with a SELECT node. + if (SCC.getNode()) { + // Check to see if we got a select_cc back (to turn into setcc/select). + // Otherwise, just return whatever node we got back, like fabs. + if (SCC.getOpcode() == ISD::SELECT_CC) { + const SDNodeFlags Flags = N0.getNode()->getFlags(); + SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), + N0.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), + SCC.getOperand(4), Flags); + AddToWorklist(SETCC.getNode()); + SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, + SCC.getOperand(2), SCC.getOperand(3)); + SelectNode->setFlags(Flags); + return SelectNode; + } + + return SCC; + } + return SDValue(); +} + +/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values +/// being selected between, see if we can simplify the select. Callers of this +/// should assume that TheSelect is deleted if this returns true. As such, they +/// should return the appropriate thing (e.g. the node) back to the top-level of +/// the DAG combiner loop to avoid it being looked at. +bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, + SDValue RHS) { + // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) + // The select + setcc is redundant, because fsqrt returns NaN for X < 0. + if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { + if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { + // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) + SDValue Sqrt = RHS; + ISD::CondCode CC; + SDValue CmpLHS; + const ConstantFPSDNode *Zero = nullptr; + + if (TheSelect->getOpcode() == ISD::SELECT_CC) { + CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); + CmpLHS = TheSelect->getOperand(0); + Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); + } else { + // SELECT or VSELECT + SDValue Cmp = TheSelect->getOperand(0); + if (Cmp.getOpcode() == ISD::SETCC) { + CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); + CmpLHS = Cmp.getOperand(0); + Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); + } + } + if (Zero && Zero->isZero() && + Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || + CC == ISD::SETULT || CC == ISD::SETLT)) { + // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) + CombineTo(TheSelect, Sqrt); + return true; + } + } + } + // Cannot simplify select with vector condition + if (TheSelect->getOperand(0).getValueType().isVector()) return false; + + // If this is a select from two identical things, try to pull the operation + // through the select. + if (LHS.getOpcode() != RHS.getOpcode() || + !LHS.hasOneUse() || !RHS.hasOneUse()) + return false; + + // If this is a load and the token chain is identical, replace the select + // of two loads with a load through a select of the address to load from. + // This triggers in things like "select bool X, 10.0, 123.0" after the FP + // constants have been dropped into the constant pool. + if (LHS.getOpcode() == ISD::LOAD) { + LoadSDNode *LLD = cast<LoadSDNode>(LHS); + LoadSDNode *RLD = cast<LoadSDNode>(RHS); + + // Token chains must be identical. + if (LHS.getOperand(0) != RHS.getOperand(0) || + // Do not let this transformation reduce the number of volatile loads. + // Be conservative for atomics for the moment + // TODO: This does appear to be legal for unordered atomics (see D66309) + !LLD->isSimple() || !RLD->isSimple() || + // FIXME: If either is a pre/post inc/dec load, + // we'd need to split out the address adjustment. + LLD->isIndexed() || RLD->isIndexed() || + // If this is an EXTLOAD, the VT's must match. + LLD->getMemoryVT() != RLD->getMemoryVT() || + // If this is an EXTLOAD, the kind of extension must match. + (LLD->getExtensionType() != RLD->getExtensionType() && + // The only exception is if one of the extensions is anyext. + LLD->getExtensionType() != ISD::EXTLOAD && + RLD->getExtensionType() != ISD::EXTLOAD) || + // FIXME: this discards src value information. This is + // over-conservative. It would be beneficial to be able to remember + // both potential memory locations. Since we are discarding + // src value info, don't do the transformation if the memory + // locations are not in the default address space. + LLD->getPointerInfo().getAddrSpace() != 0 || + RLD->getPointerInfo().getAddrSpace() != 0 || + // We can't produce a CMOV of a TargetFrameIndex since we won't + // generate the address generation required. + LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex || + RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex || + !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), + LLD->getBasePtr().getValueType())) + return false; + + // The loads must not depend on one another. + if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) + return false; + + // Check that the select condition doesn't reach either load. If so, + // folding this will induce a cycle into the DAG. If not, this is safe to + // xform, so create a select of the addresses. + + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + + // Always fail if LLD and RLD are not independent. TheSelect is a + // predecessor to all Nodes in question so we need not search past it. + + Visited.insert(TheSelect); + Worklist.push_back(LLD); + Worklist.push_back(RLD); + + if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) || + SDNode::hasPredecessorHelper(RLD, Visited, Worklist)) + return false; + + SDValue Addr; + if (TheSelect->getOpcode() == ISD::SELECT) { + // We cannot do this optimization if any pair of {RLD, LLD} is a + // predecessor to {RLD, LLD, CondNode}. As we've already compared the + // Loads, we only need to check if CondNode is a successor to one of the + // loads. We can further avoid this if there's no use of their chain + // value. + SDNode *CondNode = TheSelect->getOperand(0).getNode(); + Worklist.push_back(CondNode); + + if ((LLD->hasAnyUseOfValue(1) && + SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) || + (RLD->hasAnyUseOfValue(1) && + SDNode::hasPredecessorHelper(RLD, Visited, Worklist))) + return false; + + Addr = DAG.getSelect(SDLoc(TheSelect), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } else { // Otherwise SELECT_CC + // We cannot do this optimization if any pair of {RLD, LLD} is a + // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared + // the Loads, we only need to check if CondLHS/CondRHS is a successor to + // one of the loads. We can further avoid this if there's no use of their + // chain value. + + SDNode *CondLHS = TheSelect->getOperand(0).getNode(); + SDNode *CondRHS = TheSelect->getOperand(1).getNode(); + Worklist.push_back(CondLHS); + Worklist.push_back(CondRHS); + + if ((LLD->hasAnyUseOfValue(1) && + SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) || + (RLD->hasAnyUseOfValue(1) && + SDNode::hasPredecessorHelper(RLD, Visited, Worklist))) + return false; + + Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } + + SDValue Load; + // It is safe to replace the two loads if they have different alignments, + // but the new load must be the minimum (most restrictive) alignment of the + // inputs. + unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); + MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); + if (!RLD->isInvariant()) + MMOFlags &= ~MachineMemOperand::MOInvariant; + if (!RLD->isDereferenceable()) + MMOFlags &= ~MachineMemOperand::MODereferenceable; + if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { + // FIXME: Discards pointer and AA info. + Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), + LLD->getChain(), Addr, MachinePointerInfo(), Alignment, + MMOFlags); + } else { + // FIXME: Discards pointer and AA info. + Load = DAG.getExtLoad( + LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() + : LLD->getExtensionType(), + SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr, + MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags); + } + + // Users of the select now use the result of the load. + CombineTo(TheSelect, Load); + + // Users of the old loads now use the new load's chain. We know the + // old-load value is dead now. + CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); + CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); + return true; + } + + return false; +} + +/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and +/// bitwise 'and'. +SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, + SDValue N1, SDValue N2, SDValue N3, + ISD::CondCode CC) { + // If this is a select where the false operand is zero and the compare is a + // check of the sign bit, see if we can perform the "gzip trick": + // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A + // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A + EVT XType = N0.getValueType(); + EVT AType = N2.getValueType(); + if (!isNullConstant(N3) || !XType.bitsGE(AType)) + return SDValue(); + + // If the comparison is testing for a positive value, we have to invert + // the sign bit mask, so only do that transform if the target has a bitwise + // 'and not' instruction (the invert is free). + if (CC == ISD::SETGT && TLI.hasAndNot(N2)) { + // (X > -1) ? A : 0 + // (X > 0) ? X : 0 <-- This is canonical signed max. + if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2))) + return SDValue(); + } else if (CC == ISD::SETLT) { + // (X < 0) ? A : 0 + // (X < 1) ? X : 0 <-- This is un-canonicalized signed min. + if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2))) + return SDValue(); + } else { + return SDValue(); + } + + // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit + // constant. + EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); + auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { + unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; + SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); + SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); + AddToWorklist(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorklist(Shift.getNode()); + } + + if (CC == ISD::SETGT) + Shift = DAG.getNOT(DL, Shift, AType); + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + + SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy); + SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); + AddToWorklist(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorklist(Shift.getNode()); + } + + if (CC == ISD::SETGT) + Shift = DAG.getNOT(DL, Shift, AType); + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); +} + +/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" +/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 +/// in it. This may be a win when the constant is not otherwise available +/// because it replaces two constant pool loads with one. +SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( + const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, + ISD::CondCode CC) { + if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType())) + return SDValue(); + + // If we are before legalize types, we want the other legalization to happen + // first (for example, to avoid messing with soft float). + auto *TV = dyn_cast<ConstantFPSDNode>(N2); + auto *FV = dyn_cast<ConstantFPSDNode>(N3); + EVT VT = N2.getValueType(); + if (!TV || !FV || !TLI.isTypeLegal(VT)) + return SDValue(); + + // If a constant can be materialized without loads, this does not make sense. + if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal || + TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) || + TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize)) + return SDValue(); + + // If both constants have multiple uses, then we won't need to do an extra + // load. The values are likely around in registers for other users. + if (!TV->hasOneUse() && !FV->hasOneUse()) + return SDValue(); + + Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()), + const_cast<ConstantFP*>(TV->getConstantFPValue()) }; + Type *FPTy = Elts[0]->getType(); + const DataLayout &TD = DAG.getDataLayout(); + + // Create a ConstantArray of the two constants. + Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); + SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), + TD.getPrefTypeAlignment(FPTy)); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + + // Get offsets to the 0 and 1 elements of the array, so we can select between + // them. + SDValue Zero = DAG.getIntPtrConstant(0, DL); + unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); + SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); + SDValue Cond = + DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); + AddToWorklist(Cond.getNode()); + SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero); + AddToWorklist(CstOffset.getNode()); + CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); + AddToWorklist(CPIdx.getNode()); + return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool( + DAG.getMachineFunction()), Alignment); +} + +/// Simplify an expression of the form (N0 cond N1) ? N2 : N3 +/// where 'cond' is the comparison specified by CC. +SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, ISD::CondCode CC, + bool NotExtCompare) { + // (x ? y : y) -> y. + if (N2 == N3) return N2; + + EVT CmpOpVT = N0.getValueType(); + EVT CmpResVT = getSetCCResultType(CmpOpVT); + EVT VT = N2.getValueType(); + auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); + + // Determine if the condition we're dealing with is constant. + if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) { + AddToWorklist(SCC.getNode()); + if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) { + // fold select_cc true, x, y -> x + // fold select_cc false, x, y -> y + return !(SCCC->isNullValue()) ? N2 : N3; + } + } + + if (SDValue V = + convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC)) + return V; + + if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) + return V; + + // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) + // where y is has a single bit set. + // A plaintext description would be, we can turn the SELECT_CC into an AND + // when the condition can be materialized as an all-ones register. Any + // single bit-test can be materialized as an all-ones register with + // shift-left and shift-right-arith. + // TODO: The operation legality checks could be loosened to include "custom", + // but that may cause regressions for targets that do not have shift + // instructions. + if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) && + TLI.isOperationLegal(ISD::SHL, VT) && + TLI.isOperationLegal(ISD::SRA, VT)) { + SDValue AndLHS = N0->getOperand(0); + auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); + if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { + // Shift the tested bit over the sign bit. + const APInt &AndMask = ConstAndRHS->getAPIntValue(); + SDValue ShlAmt = + DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), + getShiftAmountTy(AndLHS.getValueType())); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); + + // Now arithmetic right shift it all the way over, so the result is either + // all-ones, or zero. + SDValue ShrAmt = + DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), + getShiftAmountTy(Shl.getValueType())); + SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); + + return DAG.getNode(ISD::AND, DL, VT, Shr, N3); + } + } + + // fold select C, 16, 0 -> shl C, 4 + bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2(); + bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2(); + + if ((Fold || Swap) && + TLI.getBooleanContents(CmpOpVT) == + TargetLowering::ZeroOrOneBooleanContent && + (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) { + + if (Swap) { + CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger()); + std::swap(N2C, N3C); + } + + // If the caller doesn't want us to simplify this into a zext of a compare, + // don't do it. + if (NotExtCompare && N2C->isOne()) + return SDValue(); + + SDValue Temp, SCC; + // zext (setcc n0, n1) + if (LegalTypes) { + SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC); + if (VT.bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); + } else { + SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); + } + + AddToWorklist(SCC.getNode()); + AddToWorklist(Temp.getNode()); + + if (N2C->isOne()) + return Temp; + + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + SDLoc(Temp), + getShiftAmountTy(Temp.getValueType()))); + } + + // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) + // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) + // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) + // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X) + // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X) + // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) + // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) + // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) + if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + SDValue ValueOnZero = N2; + SDValue Count = N3; + // If the condition is NE instead of E, swap the operands. + if (CC == ISD::SETNE) + std::swap(ValueOnZero, Count); + // Check if the value on zero is a constant equal to the bits in the type. + if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) { + if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) { + // If the other operand is cttz/cttz_zero_undef of N0, and cttz is + // legal, combine to just cttz. + if ((Count.getOpcode() == ISD::CTTZ || + Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && + N0 == Count.getOperand(0) && + (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT))) + return DAG.getNode(ISD::CTTZ, DL, VT, N0); + // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is + // legal, combine to just ctlz. + if ((Count.getOpcode() == ISD::CTLZ || + Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) && + N0 == Count.getOperand(0) && + (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT))) + return DAG.getNode(ISD::CTLZ, DL, VT, N0); + } + } + } + + return SDValue(); +} + +/// This is a stub for TargetLowering::SimplifySetCC. +SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &DL, + bool foldBooleans) { + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level, false, this); + return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); +} + +/// Given an ISD::SDIV node expressing a divide by constant, return +/// a DAG expression to select that will generate the same value by multiplying +/// by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". +SDValue DAGCombiner::BuildSDIV(SDNode *N) { + // when optimising for minimum size, we don't want to expand a div to a mul + // and a shift. + if (DAG.getMachineFunction().getFunction().hasMinSize()) + return SDValue(); + + SmallVector<SDNode *, 8> Built; + if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) { + for (SDNode *N : Built) + AddToWorklist(N); + return S; + } + + return SDValue(); +} + +/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a +/// DAG expression that will generate the same value by right shifting. +SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (C->isNullValue()) + return SDValue(); + + SmallVector<SDNode *, 8> Built; + if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) { + for (SDNode *N : Built) + AddToWorklist(N); + return S; + } + + return SDValue(); +} + +/// Given an ISD::UDIV node expressing a divide by constant, return a DAG +/// expression that will generate the same value by multiplying by a magic +/// number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". +SDValue DAGCombiner::BuildUDIV(SDNode *N) { + // when optimising for minimum size, we don't want to expand a div to a mul + // and a shift. + if (DAG.getMachineFunction().getFunction().hasMinSize()) + return SDValue(); + + SmallVector<SDNode *, 8> Built; + if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) { + for (SDNode *N : Built) + AddToWorklist(N); + return S; + } + + return SDValue(); +} + +/// Determines the LogBase2 value for a non-null input value using the +/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). +SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { + EVT VT = V.getValueType(); + unsigned EltBits = VT.getScalarSizeInBits(); + SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V); + SDValue Base = DAG.getConstant(EltBits - 1, DL, VT); + SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz); + return LogBase2; +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal, we need to find the zero of the function: +/// F(X) = A X - 1 [which has a zero at X = 1/A] +/// => +/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form +/// does not require additional intermediate precision] +/// For the last iteration, put numerator N into it to gain more precision: +/// Result = N X_i + X_i (N - N A X_i) +SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, + SDNodeFlags Flags) { + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // TODO: Handle half and/or extended types? + EVT VT = Op.getValueType(); + if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + return SDValue(); + + // If estimates are explicitly disabled for this function, we're done. + MachineFunction &MF = DAG.getMachineFunction(); + int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF); + if (Enabled == TLI.ReciprocalEstimate::Disabled) + return SDValue(); + + // Estimates may be explicitly enabled for this type with a custom number of + // refinement steps. + int Iterations = TLI.getDivRefinementSteps(VT, MF); + if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { + AddToWorklist(Est.getNode()); + + SDLoc DL(Op); + if (Iterations) { + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + + // Newton iterations: Est = Est + Est (N - Arg * Est) + // If this is the last iteration, also multiply by the numerator. + for (int i = 0; i < Iterations; ++i) { + SDValue MulEst = Est; + + if (i == Iterations - 1) { + MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags); + AddToWorklist(MulEst.getNode()); + } + + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FSUB, DL, VT, + (i == Iterations - 1 ? N : FPOne), NewEst, Flags); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); + AddToWorklist(NewEst.getNode()); + + Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags); + AddToWorklist(Est.getNode()); + } + } else { + // If no iterations are available, multiply with N. + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags); + AddToWorklist(Est.getNode()); + } + + return Est; + } + + return SDValue(); +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +/// => +/// X_{i+1} = X_i (1.5 - A X_i^2 / 2) +/// As a result, we precompute A/2 prior to the iteration loop. +SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, + unsigned Iterations, + SDNodeFlags Flags, bool Reciprocal) { + EVT VT = Arg.getValueType(); + SDLoc DL(Arg); + SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); + + // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that + // this entire sequence requires only one FP constant. + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); + + // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); + } + + // If non-reciprocal square root is requested, multiply the result by Arg. + if (!Reciprocal) + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + + return Est; +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +/// => +/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) +SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, + unsigned Iterations, + SDNodeFlags Flags, bool Reciprocal) { + EVT VT = Arg.getValueType(); + SDLoc DL(Arg); + SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); + SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); + + // This routine must enter the loop below to work correctly + // when (Reciprocal == false). + assert(Iterations > 0); + + // Newton iterations for reciprocal square root: + // E = (E * -0.5) * ((A * E) * E + -3.0) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); + SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); + SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); + + // When calculating a square root at the last iteration build: + // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) + // (notice a common subexpression) + SDValue LHS; + if (Reciprocal || (i + 1) < Iterations) { + // RSQRT: LHS = (E * -0.5) + LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + } else { + // SQRT: LHS = (A * E) * -0.5 + LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); + } + + Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); + } + + return Est; +} + +/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case +/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if +/// Op can be zero. +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, + bool Reciprocal) { + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // TODO: Handle half and/or extended types? + EVT VT = Op.getValueType(); + if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + return SDValue(); + + // If estimates are explicitly disabled for this function, we're done. + MachineFunction &MF = DAG.getMachineFunction(); + int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF); + if (Enabled == TLI.ReciprocalEstimate::Disabled) + return SDValue(); + + // Estimates may be explicitly enabled for this type with a custom number of + // refinement steps. + int Iterations = TLI.getSqrtRefinementSteps(VT, MF); + + bool UseOneConstNR = false; + if (SDValue Est = + TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR, + Reciprocal)) { + AddToWorklist(Est.getNode()); + + if (Iterations) { + Est = UseOneConstNR + ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + + if (!Reciprocal) { + // The estimate is now completely wrong if the input was exactly 0.0 or + // possibly a denormal. Force the answer to 0.0 for those cases. + SDLoc DL(Op); + EVT CCVT = getSetCCResultType(VT); + ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; + const Function &F = DAG.getMachineFunction().getFunction(); + Attribute Denorms = F.getFnAttribute("denormal-fp-math"); + if (Denorms.getValueAsString().equals("ieee")) { + // fabs(X) < SmallestNormal ? 0.0 : Est + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); + SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); + SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); + Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); + } else { + // X == 0.0 ? 0.0 : Est + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); + Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); + } + } + } + return Est; + } + + return SDValue(); +} + +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { + return buildSqrtEstimateImpl(Op, Flags, true); +} + +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { + return buildSqrtEstimateImpl(Op, Flags, false); +} + +/// Return true if there is any possibility that the two addresses overlap. +bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { + + struct MemUseCharacteristics { + bool IsVolatile; + bool IsAtomic; + SDValue BasePtr; + int64_t Offset; + Optional<int64_t> NumBytes; + MachineMemOperand *MMO; + }; + + auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics { + if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) { + int64_t Offset = 0; + if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset())) + Offset = (LSN->getAddressingMode() == ISD::PRE_INC) + ? C->getSExtValue() + : (LSN->getAddressingMode() == ISD::PRE_DEC) + ? -1 * C->getSExtValue() + : 0; + return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), + Offset /*base offset*/, + Optional<int64_t>(LSN->getMemoryVT().getStoreSize()), + LSN->getMemOperand()}; + } + if (const auto *LN = cast<LifetimeSDNode>(N)) + return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), + (LN->hasOffset()) ? LN->getOffset() : 0, + (LN->hasOffset()) ? Optional<int64_t>(LN->getSize()) + : Optional<int64_t>(), + (MachineMemOperand *)nullptr}; + // Default. + return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(), + (int64_t)0 /*offset*/, + Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr}; + }; + + MemUseCharacteristics MUC0 = getCharacteristics(Op0), + MUC1 = getCharacteristics(Op1); + + // If they are to the same address, then they must be aliases. + if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr && + MUC0.Offset == MUC1.Offset) + return true; + + // If they are both volatile then they cannot be reordered. + if (MUC0.IsVolatile && MUC1.IsVolatile) + return true; + + // Be conservative about atomics for the moment + // TODO: This is way overconservative for unordered atomics (see D66309) + if (MUC0.IsAtomic && MUC1.IsAtomic) + return true; + + if (MUC0.MMO && MUC1.MMO) { + if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || + (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) + return false; + } + + // Try to prove that there is aliasing, or that there is no aliasing. Either + // way, we can return now. If nothing can be proved, proceed with more tests. + bool IsAlias; + if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes, + DAG, IsAlias)) + return IsAlias; + + // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if + // either are not known. + if (!MUC0.MMO || !MUC1.MMO) + return true; + + // If one operation reads from invariant memory, and the other may store, they + // cannot alias. These should really be checking the equivalent of mayWrite, + // but it only matters for memory nodes other than load /store. + if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || + (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) + return false; + + // If we know required SrcValue1 and SrcValue2 have relatively large + // alignment compared to the size and offset of the access, we may be able + // to prove they do not alias. This check is conservative for now to catch + // cases created by splitting vector types. + int64_t SrcValOffset0 = MUC0.MMO->getOffset(); + int64_t SrcValOffset1 = MUC1.MMO->getOffset(); + unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment(); + unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment(); + if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && + MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() && + *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) { + int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; + int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; + + // There is no overlap between these relatively aligned accesses of + // similar size. Return no alias. + if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 || + (OffAlign1 + *MUC1.NumBytes) <= OffAlign0) + return false; + } + + bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 + ? CombinerGlobalAA + : DAG.getSubtarget().useAA(); +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + + if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) { + // Use alias analysis information. + int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); + int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset; + int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset; + AliasResult AAResult = AA->alias( + MemoryLocation(MUC0.MMO->getValue(), Overlap0, + UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), + MemoryLocation(MUC1.MMO->getValue(), Overlap1, + UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())); + if (AAResult == NoAlias) + return false; + } + + // Otherwise we have to assume they alias. + return true; +} + +/// Walk up chain skipping non-aliasing memory nodes, +/// looking for aliasing nodes and adding them to the Aliases vector. +void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVectorImpl<SDValue> &Aliases) { + SmallVector<SDValue, 8> Chains; // List of chains to visit. + SmallPtrSet<SDNode *, 16> Visited; // Visited node set. + + // Get alias information for node. + // TODO: relax aliasing for unordered atomics (see D66309) + const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple(); + + // Starting off. + Chains.push_back(OriginalChain); + unsigned Depth = 0; + + // Attempt to improve chain by a single step + std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool { + switch (C.getOpcode()) { + case ISD::EntryToken: + // No need to mark EntryToken. + C = SDValue(); + return true; + case ISD::LOAD: + case ISD::STORE: { + // Get alias information for C. + // TODO: Relax aliasing for unordered atomics (see D66309) + bool IsOpLoad = isa<LoadSDNode>(C.getNode()) && + cast<LSBaseSDNode>(C.getNode())->isSimple(); + if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { + // Look further up the chain. + C = C.getOperand(0); + return true; + } + // Alias, so stop here. + return false; + } + + case ISD::CopyFromReg: + // Always forward past past CopyFromReg. + C = C.getOperand(0); + return true; + + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: { + // We can forward past any lifetime start/end that can be proven not to + // alias the memory access. + if (!isAlias(N, C.getNode())) { + // Look further up the chain. + C = C.getOperand(0); + return true; + } + return false; + } + default: + return false; + } + }; + + // Look at each chain and determine if it is an alias. If so, add it to the + // aliases list. If not, then continue up the chain looking for the next + // candidate. + while (!Chains.empty()) { + SDValue Chain = Chains.pop_back_val(); + + // Don't bother if we've seen Chain before. + if (!Visited.insert(Chain.getNode()).second) + continue; + + // For TokenFactor nodes, look at each operand and only continue up the + // chain until we reach the depth limit. + // + // FIXME: The depth check could be made to return the last non-aliasing + // chain we found before we hit a tokenfactor rather than the original + // chain. + if (Depth > TLI.getGatherAllAliasesMaxDepth()) { + Aliases.clear(); + Aliases.push_back(OriginalChain); + return; + } + + if (Chain.getOpcode() == ISD::TokenFactor) { + // We have to check each of the operands of the token factor for "small" + // token factors, so we queue them up. Adding the operands to the queue + // (stack) in reverse order maintains the original order and increases the + // likelihood that getNode will find a matching token factor (CSE.) + if (Chain.getNumOperands() > 16) { + Aliases.push_back(Chain); + continue; + } + for (unsigned n = Chain.getNumOperands(); n;) + Chains.push_back(Chain.getOperand(--n)); + ++Depth; + continue; + } + // Everything else + if (ImproveChain(Chain)) { + // Updated Chain Found, Consider new chain if one exists. + if (Chain.getNode()) + Chains.push_back(Chain); + ++Depth; + continue; + } + // No Improved Chain Possible, treat as Alias. + Aliases.push_back(Chain); + } +} + +/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain +/// (aliasing node.) +SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { + if (OptLevel == CodeGenOpt::None) + return OldChain; + + // Ops for replacing token factor. + SmallVector<SDValue, 8> Aliases; + + // Accumulate all the aliases to this node. + GatherAllAliases(N, OldChain, Aliases); + + // If no operands then chain to entry token. + if (Aliases.size() == 0) + return DAG.getEntryNode(); + + // If a single operand then chain to it. We don't need to revisit it. + if (Aliases.size() == 1) + return Aliases[0]; + + // Construct a custom tailored token factor. + return DAG.getTokenFactor(SDLoc(N), Aliases); +} + +namespace { +// TODO: Replace with with std::monostate when we move to C++17. +struct UnitT { } Unit; +bool operator==(const UnitT &, const UnitT &) { return true; } +bool operator!=(const UnitT &, const UnitT &) { return false; } +} // namespace + +// This function tries to collect a bunch of potentially interesting +// nodes to improve the chains of, all at once. This might seem +// redundant, as this function gets called when visiting every store +// node, so why not let the work be done on each store as it's visited? +// +// I believe this is mainly important because MergeConsecutiveStores +// is unable to deal with merging stores of different sizes, so unless +// we improve the chains of all the potential candidates up-front +// before running MergeConsecutiveStores, it might only see some of +// the nodes that will eventually be candidates, and then not be able +// to go from a partially-merged state to the desired final +// fully-merged state. + +bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { + SmallVector<StoreSDNode *, 8> ChainedStores; + StoreSDNode *STChain = St; + // Intervals records which offsets from BaseIndex have been covered. In + // the common case, every store writes to the immediately previous address + // space and thus merged with the previous interval at insertion time. + + using IMap = + llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>; + IMap::Allocator A; + IMap Intervals(A); + + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); + + // We must have a base and an offset. + if (!BasePtr.getBase().getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.getBase().isUndef()) + return false; + + // Add ST's interval. + Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); + + while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) { + // If the chain has more than one use, then we can't reorder the mem ops. + if (!SDValue(Chain, 0)->hasOneUse()) + break; + // TODO: Relax for unordered atomics (see D66309) + if (!Chain->isSimple() || Chain->isIndexed()) + break; + + // Find the base pointer and offset for this memory node. + const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG); + // Check that the base pointer is the same as the original one. + int64_t Offset; + if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset)) + break; + int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8; + // Make sure we don't overlap with other intervals by checking the ones to + // the left or right before inserting. + auto I = Intervals.find(Offset); + // If there's a next interval, we should end before it. + if (I != Intervals.end() && I.start() < (Offset + Length)) + break; + // If there's a previous interval, we should start after it. + if (I != Intervals.begin() && (--I).stop() <= Offset) + break; + Intervals.insert(Offset, Offset + Length, Unit); + + ChainedStores.push_back(Chain); + STChain = Chain; + } + + // If we didn't find a chained store, exit. + if (ChainedStores.size() == 0) + return false; + + // Improve all chained stores (St and ChainedStores members) starting from + // where the store chain ended and return single TokenFactor. + SDValue NewChain = STChain->getChain(); + SmallVector<SDValue, 8> TFOps; + for (unsigned I = ChainedStores.size(); I;) { + StoreSDNode *S = ChainedStores[--I]; + SDValue BetterChain = FindBetterChain(S, NewChain); + S = cast<StoreSDNode>(DAG.UpdateNodeOperands( + S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3))); + TFOps.push_back(SDValue(S, 0)); + ChainedStores[I] = S; + } + + // Improve St's chain. Use a new node to avoid creating a loop from CombineTo. + SDValue BetterChain = FindBetterChain(St, NewChain); + SDValue NewST; + if (St->isTruncatingStore()) + NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(), + St->getBasePtr(), St->getMemoryVT(), + St->getMemOperand()); + else + NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(), + St->getBasePtr(), St->getMemOperand()); + + TFOps.push_back(NewST); + + // If we improved every element of TFOps, then we've lost the dependence on + // NewChain to successors of St and we need to add it back to TFOps. Do so at + // the beginning to keep relative order consistent with FindBetterChains. + auto hasImprovedChain = [&](SDValue ST) -> bool { + return ST->getOperand(0) != NewChain; + }; + bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain); + if (AddNewChain) + TFOps.insert(TFOps.begin(), NewChain); + + SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps); + CombineTo(St, TF); + + // Add TF and its operands to the worklist. + AddToWorklist(TF.getNode()); + for (const SDValue &Op : TF->ops()) + AddToWorklist(Op.getNode()); + AddToWorklist(STChain); + return true; +} + +bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { + if (OptLevel == CodeGenOpt::None) + return false; + + const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); + + // We must have a base and an offset. + if (!BasePtr.getBase().getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.getBase().isUndef()) + return false; + + // Directly improve a chain of disjoint stores starting at St. + if (parallelizeChainedStores(St)) + return true; + + // Improve St's Chain.. + SDValue BetterChain = FindBetterChain(St, St->getChain()); + if (St->getChain() != BetterChain) { + replaceStoreChain(St, BetterChain); + return true; + } + return false; +} + +/// This is the entry point for the file. +void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA, + CodeGenOpt::Level OptLevel) { + /// This is the main entry point to this class. + DAGCombiner(*this, AA, OptLevel).Run(Level); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp new file mode 100644 index 0000000000000..6d7260d7aee5c --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -0,0 +1,2474 @@ +//===- FastISel.cpp - Implementation of the FastISel class ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the FastISel class. +// +// "Fast" instruction selection is designed to emit very poor code quickly. +// Also, it is not designed to be able to do much lowering, so most illegal +// types (e.g. i64 on 32-bit targets) and operations are not supported. It is +// also not intended to be able to do much optimization, except in a few cases +// where doing optimizations reduces overall compile time. For example, folding +// constants into immediate fields is often done, because it's cheap and it +// reduces the number of instructions later phases have to examine. +// +// "Fast" instruction selection is able to fail gracefully and transfer +// control to the SelectionDAG selector for operations that it doesn't +// support. In many cases, this allows us to avoid duplicating a lot of +// the complicated lowering logic that SelectionDAG currently has. +// +// The intended use for "fast" instruction selection is "-O0" mode +// compilation, where the quality of the generated code is irrelevant when +// weighed against the speed at which the code can be generated. Also, +// at -O0, the LLVM optimizers are not running, and this makes the +// compile time of codegen a much higher portion of the overall compile +// time. Despite its limitations, "fast" instruction selection is able to +// handle enough code on its own to provide noticeable overall speedups +// in -O0 compiles. +// +// Basic operations are supported in a target-independent way, by reading +// the same instruction descriptions that the SelectionDAG selector reads, +// and identifying simple arithmetic operations that can be directly selected +// from simple operators. More complicated operations currently require +// target-specific code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/FastISel.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <utility> + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "isel" + +// FIXME: Remove this after the feature has proven reliable. +static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values", + cl::init(true), cl::Hidden, + cl::desc("Sink local values in FastISel")); + +STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " + "target-independent selector"); +STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " + "target-specific selector"); +STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); + +/// Set the current block to which generated machine instructions will be +/// appended. +void FastISel::startNewBlock() { + assert(LocalValueMap.empty() && + "local values should be cleared after finishing a BB"); + + // Instructions are appended to FuncInfo.MBB. If the basic block already + // contains labels or copies, use the last instruction as the last local + // value. + EmitStartPt = nullptr; + if (!FuncInfo.MBB->empty()) + EmitStartPt = &FuncInfo.MBB->back(); + LastLocalValue = EmitStartPt; +} + +/// Flush the local CSE map and sink anything we can. +void FastISel::finishBasicBlock() { flushLocalValueMap(); } + +bool FastISel::lowerArguments() { + if (!FuncInfo.CanLowerReturn) + // Fallback to SDISel argument lowering code to deal with sret pointer + // parameter. + return false; + + if (!fastLowerArguments()) + return false; + + // Enter arguments into ValueMap for uses in non-entry BBs. + for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), + E = FuncInfo.Fn->arg_end(); + I != E; ++I) { + DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I); + assert(VI != LocalValueMap.end() && "Missed an argument?"); + FuncInfo.ValueMap[&*I] = VI->second; + } + return true; +} + +/// Return the defined register if this instruction defines exactly one +/// virtual register and uses no other virtual registers. Otherwise return 0. +static unsigned findSinkableLocalRegDef(MachineInstr &MI) { + unsigned RegDef = 0; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (MO.isDef()) { + if (RegDef) + return 0; + RegDef = MO.getReg(); + } else if (Register::isVirtualRegister(MO.getReg())) { + // This is another use of a vreg. Don't try to sink it. + return 0; + } + } + return RegDef; +} + +void FastISel::flushLocalValueMap() { + // Try to sink local values down to their first use so that we can give them a + // better debug location. This has the side effect of shrinking local value + // live ranges, which helps out fast regalloc. + if (SinkLocalValues && LastLocalValue != EmitStartPt) { + // Sink local value materialization instructions between EmitStartPt and + // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to + // avoid inserting into the range that we're iterating over. + MachineBasicBlock::reverse_iterator RE = + EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt) + : FuncInfo.MBB->rend(); + MachineBasicBlock::reverse_iterator RI(LastLocalValue); + + InstOrderMap OrderMap; + for (; RI != RE;) { + MachineInstr &LocalMI = *RI; + ++RI; + bool Store = true; + if (!LocalMI.isSafeToMove(nullptr, Store)) + continue; + unsigned DefReg = findSinkableLocalRegDef(LocalMI); + if (DefReg == 0) + continue; + + sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap); + } + } + + LocalValueMap.clear(); + LastLocalValue = EmitStartPt; + recomputeInsertPt(); + SavedInsertPt = FuncInfo.InsertPt; + LastFlushPoint = FuncInfo.InsertPt; +} + +static bool isRegUsedByPhiNodes(unsigned DefReg, + FunctionLoweringInfo &FuncInfo) { + for (auto &P : FuncInfo.PHINodesToUpdate) + if (P.second == DefReg) + return true; + return false; +} + +/// Build a map of instruction orders. Return the first terminator and its +/// order. Consider EH_LABEL instructions to be terminators as well, since local +/// values for phis after invokes must be materialized before the call. +void FastISel::InstOrderMap::initialize( + MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) { + unsigned Order = 0; + for (MachineInstr &I : *MBB) { + if (!FirstTerminator && + (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) { + FirstTerminator = &I; + FirstTerminatorOrder = Order; + } + Orders[&I] = Order++; + + // We don't need to order instructions past the last flush point. + if (I.getIterator() == LastFlushPoint) + break; + } +} + +void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI, + unsigned DefReg, + InstOrderMap &OrderMap) { + // If this register is used by a register fixup, MRI will not contain all + // the uses until after register fixups, so don't attempt to sink or DCE + // this instruction. Register fixups typically come from no-op cast + // instructions, which replace the cast instruction vreg with the local + // value vreg. + if (FuncInfo.RegsWithFixups.count(DefReg)) + return; + + // We can DCE this instruction if there are no uses and it wasn't a + // materialized for a successor PHI node. + bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo); + if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) { + if (EmitStartPt == &LocalMI) + EmitStartPt = EmitStartPt->getPrevNode(); + LLVM_DEBUG(dbgs() << "removing dead local value materialization " + << LocalMI); + OrderMap.Orders.erase(&LocalMI); + LocalMI.eraseFromParent(); + return; + } + + // Number the instructions if we haven't yet so we can efficiently find the + // earliest use. + if (OrderMap.Orders.empty()) + OrderMap.initialize(FuncInfo.MBB, LastFlushPoint); + + // Find the first user in the BB. + MachineInstr *FirstUser = nullptr; + unsigned FirstOrder = std::numeric_limits<unsigned>::max(); + for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) { + auto I = OrderMap.Orders.find(&UseInst); + assert(I != OrderMap.Orders.end() && + "local value used by instruction outside local region"); + unsigned UseOrder = I->second; + if (UseOrder < FirstOrder) { + FirstOrder = UseOrder; + FirstUser = &UseInst; + } + } + + // The insertion point will be the first terminator or the first user, + // whichever came first. If there was no terminator, this must be a + // fallthrough block and the insertion point is the end of the block. + MachineBasicBlock::instr_iterator SinkPos; + if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) { + FirstOrder = OrderMap.FirstTerminatorOrder; + SinkPos = OrderMap.FirstTerminator->getIterator(); + } else if (FirstUser) { + SinkPos = FirstUser->getIterator(); + } else { + assert(UsedByPHI && "must be users if not used by a phi"); + SinkPos = FuncInfo.MBB->instr_end(); + } + + // Collect all DBG_VALUEs before the new insertion position so that we can + // sink them. + SmallVector<MachineInstr *, 1> DbgValues; + for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) { + if (!DbgVal.isDebugValue()) + continue; + unsigned UseOrder = OrderMap.Orders[&DbgVal]; + if (UseOrder < FirstOrder) + DbgValues.push_back(&DbgVal); + } + + // Sink LocalMI before SinkPos and assign it the same DebugLoc. + LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI); + FuncInfo.MBB->remove(&LocalMI); + FuncInfo.MBB->insert(SinkPos, &LocalMI); + if (SinkPos != FuncInfo.MBB->end()) + LocalMI.setDebugLoc(SinkPos->getDebugLoc()); + + // Sink any debug values that we've collected. + for (MachineInstr *DI : DbgValues) { + FuncInfo.MBB->remove(DI); + FuncInfo.MBB->insert(SinkPos, DI); + } +} + +bool FastISel::hasTrivialKill(const Value *V) { + // Don't consider constants or arguments to have trivial kills. + const Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + // No-op casts are trivially coalesced by fast-isel. + if (const auto *Cast = dyn_cast<CastInst>(I)) + if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0))) + return false; + + // Even the value might have only one use in the LLVM IR, it is possible that + // FastISel might fold the use into another instruction and now there is more + // than one use at the Machine Instruction level. + unsigned Reg = lookUpRegForValue(V); + if (Reg && !MRI.use_empty(Reg)) + return false; + + // GEPs with all zero indices are trivially coalesced by fast-isel. + if (const auto *GEP = dyn_cast<GetElementPtrInst>(I)) + if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) + return false; + + // Only instructions with a single use in the same basic block are considered + // to have trivial kills. + return I->hasOneUse() && + !(I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt || + I->getOpcode() == Instruction::IntToPtr) && + cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); +} + +unsigned FastISel::getRegForValue(const Value *V) { + EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true); + // Don't handle non-simple values in FastISel. + if (!RealVT.isSimple()) + return 0; + + // Ignore illegal types. We must do this before looking up the value + // in ValueMap because Arguments are given virtual registers regardless + // of whether FastISel can handle them. + MVT VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT)) { + // Handle integer promotions, though, because they're common and easy. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) + VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); + else + return 0; + } + + // Look up the value to see if we already have a register for it. + unsigned Reg = lookUpRegForValue(V); + if (Reg) + return Reg; + + // In bottom-up mode, just create the virtual register which will be used + // to hold the value. It will be materialized later. + if (isa<Instruction>(V) && + (!isa<AllocaInst>(V) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) + return FuncInfo.InitializeRegForValue(V); + + SavePoint SaveInsertPt = enterLocalValueArea(); + + // Materialize the value in a register. Emit any instructions in the + // local value area. + Reg = materializeRegForValue(V, VT); + + leaveLocalValueArea(SaveInsertPt); + + return Reg; +} + +unsigned FastISel::materializeConstant(const Value *V, MVT VT) { + unsigned Reg = 0; + if (const auto *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getValue().getActiveBits() <= 64) + Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + } else if (isa<AllocaInst>(V)) + Reg = fastMaterializeAlloca(cast<AllocaInst>(V)); + else if (isa<ConstantPointerNull>(V)) + // Translate this as an integer zero so that it can be + // local-CSE'd with actual integer zeros. + Reg = getRegForValue( + Constant::getNullValue(DL.getIntPtrType(V->getContext()))); + else if (const auto *CF = dyn_cast<ConstantFP>(V)) { + if (CF->isNullValue()) + Reg = fastMaterializeFloatZero(CF); + else + // Try to emit the constant directly. + Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF); + + if (!Reg) { + // Try to emit the constant by using an integer constant with a cast. + const APFloat &Flt = CF->getValueAPF(); + EVT IntVT = TLI.getPointerTy(DL); + uint32_t IntBitWidth = IntVT.getSizeInBits(); + APSInt SIntVal(IntBitWidth, /*isUnsigned=*/false); + bool isExact; + (void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact); + if (isExact) { + unsigned IntegerReg = + getRegForValue(ConstantInt::get(V->getContext(), SIntVal)); + if (IntegerReg != 0) + Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg, + /*Kill=*/false); + } + } + } else if (const auto *Op = dyn_cast<Operator>(V)) { + if (!selectOperator(Op, Op->getOpcode())) + if (!isa<Instruction>(Op) || + !fastSelectInstruction(cast<Instruction>(Op))) + return 0; + Reg = lookUpRegForValue(Op); + } else if (isa<UndefValue>(V)) { + Reg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), Reg); + } + return Reg; +} + +/// Helper for getRegForValue. This function is called when the value isn't +/// already available in a register and must be materialized with new +/// instructions. +unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { + unsigned Reg = 0; + // Give the target-specific code a try first. + if (isa<Constant>(V)) + Reg = fastMaterializeConstant(cast<Constant>(V)); + + // If target-specific code couldn't or didn't want to handle the value, then + // give target-independent code a try. + if (!Reg) + Reg = materializeConstant(V, VT); + + // Don't cache constant materializations in the general ValueMap. + // To do so would require tracking what uses they dominate. + if (Reg) { + LocalValueMap[V] = Reg; + LastLocalValue = MRI.getVRegDef(Reg); + } + return Reg; +} + +unsigned FastISel::lookUpRegForValue(const Value *V) { + // Look up the value to see if we already have a register for it. We + // cache values defined by Instructions across blocks, and other values + // only locally. This is because Instructions already have the SSA + // def-dominates-use requirement enforced. + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) + return I->second; + return LocalValueMap[V]; +} + +void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { + if (!isa<Instruction>(I)) { + LocalValueMap[I] = Reg; + return; + } + + unsigned &AssignedReg = FuncInfo.ValueMap[I]; + if (AssignedReg == 0) + // Use the new register. + AssignedReg = Reg; + else if (Reg != AssignedReg) { + // Arrange for uses of AssignedReg to be replaced by uses of Reg. + for (unsigned i = 0; i < NumRegs; i++) { + FuncInfo.RegFixups[AssignedReg + i] = Reg + i; + FuncInfo.RegsWithFixups.insert(Reg + i); + } + + AssignedReg = Reg; + } +} + +std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { + unsigned IdxN = getRegForValue(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return std::pair<unsigned, bool>(0, false); + + bool IdxNIsKill = hasTrivialKill(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend it. + MVT PtrVT = TLI.getPointerTy(DL); + EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); + if (IdxVT.bitsLT(PtrVT)) { + IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN, + IdxNIsKill); + IdxNIsKill = true; + } else if (IdxVT.bitsGT(PtrVT)) { + IdxN = + fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill); + IdxNIsKill = true; + } + return std::pair<unsigned, bool>(IdxN, IdxNIsKill); +} + +void FastISel::recomputeInsertPt() { + if (getLastLocalValue()) { + FuncInfo.InsertPt = getLastLocalValue(); + FuncInfo.MBB = FuncInfo.InsertPt->getParent(); + ++FuncInfo.InsertPt; + } else + FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); + + // Now skip past any EH_LABELs, which must remain at the beginning. + while (FuncInfo.InsertPt != FuncInfo.MBB->end() && + FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL) + ++FuncInfo.InsertPt; +} + +void FastISel::removeDeadCode(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 && + "Invalid iterator!"); + while (I != E) { + if (LastFlushPoint == I) + LastFlushPoint = E; + if (SavedInsertPt == I) + SavedInsertPt = E; + if (EmitStartPt == I) + EmitStartPt = E.isValid() ? &*E : nullptr; + if (LastLocalValue == I) + LastLocalValue = E.isValid() ? &*E : nullptr; + + MachineInstr *Dead = &*I; + ++I; + Dead->eraseFromParent(); + ++NumFastIselDead; + } + recomputeInsertPt(); +} + +FastISel::SavePoint FastISel::enterLocalValueArea() { + MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; + DebugLoc OldDL = DbgLoc; + recomputeInsertPt(); + DbgLoc = DebugLoc(); + SavePoint SP = {OldInsertPt, OldDL}; + return SP; +} + +void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { + if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) + LastLocalValue = &*std::prev(FuncInfo.InsertPt); + + // Restore the previous insert position. + FuncInfo.InsertPt = OldInsertPt.InsertPt; + DbgLoc = OldInsertPt.DL; +} + +bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { + EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); + if (VT == MVT::Other || !VT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // We only handle legal types. For example, on x86-32 the instruction + // selector contains all of the 64-bit instructions from x86-64, + // under the assumption that i64 won't be used if the target doesn't + // support it. + if (!TLI.isTypeLegal(VT)) { + // MVT::i1 is special. Allow AND, OR, or XOR because they + // don't require additional zeroing, which makes them easy. + if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || + ISDOpcode == ISD::XOR)) + VT = TLI.getTypeToTransformTo(I->getContext(), VT); + else + return false; + } + + // Check if the first operand is a constant, and handle it as "ri". At -O0, + // we don't have anything that canonicalizes operand order. + if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0))) + if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (!Op1) + return false; + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + unsigned ResultReg = + fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill, + CI->getZExtValue(), VT.getSimpleVT()); + if (!ResultReg) + return false; + + // We successfully emitted code for the given LLVM Instruction. + updateValueMap(I, ResultReg); + return true; + } + + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (!Op0) // Unhandled operand. Halt "fast" selection and bail. + return false; + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); + + // Check if the second operand is a constant and handle it appropriately. + if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint64_t Imm = CI->getSExtValue(); + + // Transform "sdiv exact X, 8" -> "sra X, 3". + if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && + cast<BinaryOperator>(I)->isExact() && isPowerOf2_64(Imm)) { + Imm = Log2_64(Imm); + ISDOpcode = ISD::SRA; + } + + // Transform "urem x, pow2" -> "and x, pow2-1". + if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) && + isPowerOf2_64(Imm)) { + --Imm; + ISDOpcode = ISD::AND; + } + + unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + Op0IsKill, Imm, VT.getSimpleVT()); + if (!ResultReg) + return false; + + // We successfully emitted code for the given LLVM Instruction. + updateValueMap(I, ResultReg); + return true; + } + + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (!Op1) // Unhandled operand. Halt "fast" selection and bail. + return false; + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + // Now we have both operands in registers. Emit the instruction. + unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill); + if (!ResultReg) + // Target-specific code wasn't able to find a machine opcode for + // the given ISD opcode and type. Halt "fast" selection and bail. + return false; + + // We successfully emitted code for the given LLVM Instruction. + updateValueMap(I, ResultReg); + return true; +} + +bool FastISel::selectGetElementPtr(const User *I) { + unsigned N = getRegForValue(I->getOperand(0)); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + bool NIsKill = hasTrivialKill(I->getOperand(0)); + + // Keep a running tab of the total offset to coalesce multiple N = N + Offset + // into a single N = N + TotalOffset. + uint64_t TotalOffs = 0; + // FIXME: What's a good SWAG number for MaxOffs? + uint64_t MaxOffs = 2048; + MVT VT = TLI.getPointerTy(DL); + for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); + GTI != E; ++GTI) { + const Value *Idx = GTI.getOperand(); + if (StructType *StTy = GTI.getStructTypeOrNull()) { + uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); + if (TotalOffs >= MaxOffs) { + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + } + } else { + Type *Ty = GTI.getIndexedType(); + + // If this is a constant subscript, handle it quickly. + if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->isZero()) + continue; + // N = N + Offset + uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue(); + TotalOffs += DL.getTypeAllocSize(Ty) * IdxN; + if (TotalOffs >= MaxOffs) { + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + continue; + } + if (TotalOffs) { + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = DL.getTypeAllocSize(Ty); + std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); + unsigned IdxN = Pair.first; + bool IdxNIsKill = Pair.second; + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. + return false; + + if (ElementSize != 1) { + IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. + return false; + IdxNIsKill = true; + } + N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + } + } + if (TotalOffs) { + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + } + + // We successfully emitted code for the given LLVM Instruction. + updateValueMap(I, N); + return true; +} + +bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, + const CallInst *CI, unsigned StartIdx) { + for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { + Value *Val = CI->getArgOperand(i); + // Check for constants and encode them with a StackMaps::ConstantOp prefix. + if (const auto *C = dyn_cast<ConstantInt>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(C->getSExtValue())); + } else if (isa<ConstantPointerNull>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(0)); + } else if (auto *AI = dyn_cast<AllocaInst>(Val)) { + // Values coming from a stack location also require a special encoding, + // but that is added later on by the target specific frame index + // elimination implementation. + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + Ops.push_back(MachineOperand::CreateFI(SI->second)); + else + return false; + } else { + unsigned Reg = getRegForValue(Val); + if (!Reg) + return false; + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); + } + } + return true; +} + +bool FastISel::selectStackmap(const CallInst *I) { + // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, + // [live variables...]) + assert(I->getCalledFunction()->getReturnType()->isVoidTy() && + "Stackmap cannot return a value."); + + // The stackmap intrinsic only records the live variables (the arguments + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target-specific lowering code. + // Instead we perform the call lowering right here. + // + // CALLSEQ_START(0, 0...) + // STACKMAP(id, nbytes, ...) + // CALLSEQ_END(0, 0) + // + SmallVector<MachineOperand, 32> Ops; + + // Add the <id> and <numBytes> constants. + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && + "Expected a constant integer."); + const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); + + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && + "Expected a constant integer."); + const auto *NumBytes = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); + + // Push live variables for the stack map (skipping the first two arguments + // <id> and <numBytes>). + if (!addStackMapLiveVars(Ops, I, 2)) + return false; + + // We are not adding any register mask info here, because the stackmap doesn't + // clobber anything. + + // Add scratch registers as implicit def and early clobber. + CallingConv::ID CC = I->getCallingConv(); + const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); + for (unsigned i = 0; ScratchRegs[i]; ++i) + Ops.push_back(MachineOperand::CreateReg( + ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false, + /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true)); + + // Issue CALLSEQ_START + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + auto Builder = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)); + const MCInstrDesc &MCID = Builder.getInstr()->getDesc(); + for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I) + Builder.addImm(0); + + // Issue STACKMAP. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::STACKMAP)); + for (auto const &MO : Ops) + MIB.add(MO); + + // Issue CALLSEQ_END + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) + .addImm(0) + .addImm(0); + + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo().setHasStackMap(); + + return true; +} + +/// Lower an argument list according to the target calling convention. +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, + unsigned NumArgs, const Value *Callee, + bool ForceRetVoidTy, CallLoweringInfo &CLI) { + ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + ImmutableCallSite CS(CI); + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { + Value *V = CI->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + ArgListEntry Entry; + Entry.Val = V; + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, ArgI); + Args.push_back(Entry); + } + + Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext()) + : CI->getType(); + CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs); + + return lowerCallTo(CLI); +} + +FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee( + const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy, + StringRef Target, ArgListTy &&ArgsList, unsigned FixedArgs) { + SmallString<32> MangledName; + Mangler::getNameWithPrefix(MangledName, Target, DL); + MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName); + return setCallee(CC, ResultTy, Sym, std::move(ArgsList), FixedArgs); +} + +bool FastISel::selectPatchpoint(const CallInst *I) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) + CallingConv::ID CC = I->getCallingConv(); + bool IsAnyRegCC = CC == CallingConv::AnyReg; + bool HasDef = !I->getType()->isVoidTy(); + Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts(); + + // Get the real number of arguments participating in the call <numArgs> + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) && + "Expected a constant integer."); + const auto *NumArgsVal = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)); + unsigned NumArgs = NumArgsVal->getZExtValue(); + + // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> + // This includes all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; + CallLoweringInfo CLI; + CLI.setIsPatchPoint(); + if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI)) + return false; + + assert(CLI.Call && "No call instruction specified."); + + SmallVector<MachineOperand, 32> Ops; + + // Add an explicit result reg if we use the anyreg calling convention. + if (IsAnyRegCC && HasDef) { + assert(CLI.NumResultRegs == 0 && "Unexpected result register."); + CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64)); + CLI.NumResultRegs = 1; + Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true)); + } + + // Add the <id> and <numBytes> constants. + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && + "Expected a constant integer."); + const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); + + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && + "Expected a constant integer."); + const auto *NumBytes = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); + + // Add the call target. + if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) { + uint64_t CalleeConstAddr = + cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr)); + } else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) { + if (C->getOpcode() == Instruction::IntToPtr) { + uint64_t CalleeConstAddr = + cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr)); + } else + llvm_unreachable("Unsupported ConstantExpr."); + } else if (const auto *GV = dyn_cast<GlobalValue>(Callee)) { + Ops.push_back(MachineOperand::CreateGA(GV, 0)); + } else if (isa<ConstantPointerNull>(Callee)) + Ops.push_back(MachineOperand::CreateImm(0)); + else + llvm_unreachable("Unsupported callee address."); + + // Adjust <numArgs> to account for any arguments that have been passed on + // the stack instead. + unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size(); + Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs)); + + // Add the calling convention + Ops.push_back(MachineOperand::CreateImm((unsigned)CC)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (IsAnyRegCC) { + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) { + unsigned Reg = getRegForValue(I->getArgOperand(i)); + if (!Reg) + return false; + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); + } + } + + // Push the arguments from the call instruction. + for (auto Reg : CLI.OutRegs) + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); + + // Push live variables for the stack map. + if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs)) + return false; + + // Push the register mask info. + Ops.push_back(MachineOperand::CreateRegMask( + TRI.getCallPreservedMask(*FuncInfo.MF, CC))); + + // Add scratch registers as implicit def and early clobber. + const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); + for (unsigned i = 0; ScratchRegs[i]; ++i) + Ops.push_back(MachineOperand::CreateReg( + ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false, + /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true)); + + // Add implicit defs (return values). + for (auto Reg : CLI.InRegs) + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/true, + /*isImp=*/true)); + + // Insert the patchpoint instruction before the call generated by the target. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc, + TII.get(TargetOpcode::PATCHPOINT)); + + for (auto &MO : Ops) + MIB.add(MO); + + MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI); + + // Delete the original call instruction. + CLI.Call->eraseFromParent(); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo().setHasPatchPoint(); + + if (CLI.NumResultRegs) + updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs); + return true; +} + +bool FastISel::selectXRayCustomEvent(const CallInst *I) { + const auto &Triple = TM.getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return true; // don't do anything to this instruction. + SmallVector<MachineOperand, 8> Ops; + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), + /*isDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), + /*isDef=*/false)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); + for (auto &MO : Ops) + MIB.add(MO); + + // Insert the Patchable Event Call instruction, that gets lowered properly. + return true; +} + +bool FastISel::selectXRayTypedEvent(const CallInst *I) { + const auto &Triple = TM.getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return true; // don't do anything to this instruction. + SmallVector<MachineOperand, 8> Ops; + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), + /*isDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), + /*isDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), + /*isDef=*/false)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); + for (auto &MO : Ops) + MIB.add(MO); + + // Insert the Patchable Typed Event Call instruction, that gets lowered properly. + return true; +} + +/// Returns an AttributeList representing the attributes applied to the return +/// value of the given call. +static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) { + SmallVector<Attribute::AttrKind, 2> Attrs; + if (CLI.RetSExt) + Attrs.push_back(Attribute::SExt); + if (CLI.RetZExt) + Attrs.push_back(Attribute::ZExt); + if (CLI.IsInReg) + Attrs.push_back(Attribute::InReg); + + return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex, + Attrs); +} + +bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName, + unsigned NumArgs) { + MCContext &Ctx = MF->getContext(); + SmallString<32> MangledName; + Mangler::getNameWithPrefix(MangledName, SymName, DL); + MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName); + return lowerCallTo(CI, Sym, NumArgs); +} + +bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol, + unsigned NumArgs) { + ImmutableCallSite CS(CI); + + FunctionType *FTy = CS.getFunctionType(); + Type *RetTy = CS.getType(); + + ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) { + Value *V = CI->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + ArgListEntry Entry; + Entry.Val = V; + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, ArgI); + Args.push_back(Entry); + } + TLI.markLibCallAttributes(MF, CS.getCallingConv(), Args); + + CallLoweringInfo CLI; + CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs); + + return lowerCallTo(CLI); +} + +bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { + // Handle the incoming return values from the call. + CLI.clearIns(); + SmallVector<EVT, 4> RetTys; + ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys); + + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL); + + bool CanLowerReturn = TLI.CanLowerReturn( + CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext()); + + // FIXME: sret demotion isn't supported yet - bail out. + if (!CanLowerReturn) + return false; + + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + + // Handle all of the outgoing arguments. + CLI.clearOuts(); + for (auto &Arg : CLI.getArgs()) { + Type *FinalType = Arg.Ty; + if (Arg.IsByVal) + FinalType = cast<PointerType>(Arg.Ty)->getElementType(); + bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( + FinalType, CLI.CallConv, CLI.IsVarArg); + + ISD::ArgFlagsTy Flags; + if (Arg.IsZExt) + Flags.setZExt(); + if (Arg.IsSExt) + Flags.setSExt(); + if (Arg.IsInReg) + Flags.setInReg(); + if (Arg.IsSRet) + Flags.setSRet(); + if (Arg.IsSwiftSelf) + Flags.setSwiftSelf(); + if (Arg.IsSwiftError) + Flags.setSwiftError(); + if (Arg.IsByVal) + Flags.setByVal(); + if (Arg.IsInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling in + // the various CC lowering callbacks. + Flags.setByVal(); + } + if (Arg.IsByVal || Arg.IsInAlloca) { + PointerType *Ty = cast<PointerType>(Arg.Ty); + Type *ElementTy = Ty->getElementType(); + unsigned FrameSize = + DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy); + + // For ByVal, alignment should come from FE. BE will guess if this info + // is not there, but there are cases it cannot get right. + unsigned FrameAlign = Arg.Alignment; + if (!FrameAlign) + FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL); + Flags.setByValSize(FrameSize); + Flags.setByValAlign(Align(FrameAlign)); + } + if (Arg.IsNest) + Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); + + CLI.OutVals.push_back(Arg.Val); + CLI.OutFlags.push_back(Flags); + } + + if (!fastLowerCall(CLI)) + return false; + + // Set all unused physreg defs as dead. + assert(CLI.Call && "No call instruction specified."); + CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI); + + if (CLI.NumResultRegs && CLI.CS) + updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); + + // Set labels for heapallocsite call. + if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) { + const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); + MF->addCodeViewHeapAllocSite(CLI.Call, MD); + } + + return true; +} + +bool FastISel::lowerCall(const CallInst *CI) { + ImmutableCallSite CS(CI); + + FunctionType *FuncTy = CS.getFunctionType(); + Type *RetTy = CS.getType(); + + ArgListTy Args; + ArgListEntry Entry; + Args.reserve(CS.arg_size()); + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + Entry.Val = V; + Entry.Ty = V->getType(); + + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin()); + Args.push_back(Entry); + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within fastLowerCall. + bool IsTailCall = CI->isTailCall(); + if (IsTailCall && !isInTailCallPosition(CS, TM)) + IsTailCall = false; + + CallLoweringInfo CLI; + CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS) + .setTailCall(IsTailCall); + + return lowerCallTo(CLI); +} + +bool FastISel::selectCall(const User *I) { + const CallInst *Call = cast<CallInst>(I); + + // Handle simple inline asms. + if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) { + // If the inline asm has side effects, then make sure that no local value + // lives across by flushing the local value map. + if (IA->hasSideEffects()) + flushLocalValueMap(); + + // Don't attempt to handle constraints. + if (!IA->getConstraintString().empty()) + return false; + + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::INLINEASM)) + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo); + return true; + } + + // Handle intrinsic function calls. + if (const auto *II = dyn_cast<IntrinsicInst>(Call)) + return selectIntrinsicCall(II); + + // Usually, it does not make sense to initialize a value, + // make an unrelated function call and use the value, because + // it tends to be spilled on the stack. So, we move the pointer + // to the last local value to the beginning of the block, so that + // all the values which have already been materialized, + // appear after the call. It also makes sense to skip intrinsics + // since they tend to be inlined. + flushLocalValueMap(); + + return lowerCall(Call); +} + +bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: + break; + // At -O0 we don't care about the lifetime intrinsics. + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // The donothing intrinsic does, well, nothing. + case Intrinsic::donothing: + // Neither does the sideeffect intrinsic. + case Intrinsic::sideeffect: + // Neither does the assume intrinsic; it's also OK not to codegen its operand. + case Intrinsic::assume: + return true; + case Intrinsic::dbg_declare: { + const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); + assert(DI->getVariable() && "Missing variable"); + if (!FuncInfo.MF->getMMI().hasDebugInfo()) { + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + return true; + } + + const Value *Address = DI->getAddress(); + if (!Address || isa<UndefValue>(Address)) { + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + return true; + } + + // Byval arguments with frame indices were already handled after argument + // lowering and before isel. + const auto *Arg = + dyn_cast<Argument>(Address->stripInBoundsConstantOffsets()); + if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) + return true; + + Optional<MachineOperand> Op; + if (unsigned Reg = lookUpRegForValue(Address)) + Op = MachineOperand::CreateReg(Reg, false); + + // If we have a VLA that has a "use" in a metadata node that's then used + // here but it has no other uses, then we have a problem. E.g., + // + // int foo (const int *x) { + // char a[*x]; + // return 0; + // } + // + // If we assign 'a' a vreg and fast isel later on has to use the selection + // DAG isel, it will want to copy the value to the vreg. However, there are + // no uses, which goes counter to what selection DAG isel expects. + if (!Op && !Address->use_empty() && isa<Instruction>(Address) && + (!isa<AllocaInst>(Address) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) + Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address), + false); + + if (Op) { + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); + // A dbg.declare describes the address of a source variable, so lower it + // into an indirect DBG_VALUE. + auto *Expr = DI->getExpression(); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false, + *Op, DI->getVariable(), Expr); + } else { + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + } + return true; + } + case Intrinsic::dbg_value: { + // This form of DBG_VALUE is target-independent. + const DbgValueInst *DI = cast<DbgValueInst>(II); + const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + const Value *V = DI->getValue(); + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); + if (!V) { + // Currently the optimizer can produce this; insert an undef to + // help debugging. Probably the optimizer should not do this. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U, + DI->getVariable(), DI->getExpression()); + } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getBitWidth() > 64) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addCImm(CI) + .addReg(0U) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addImm(CI->getZExtValue()) + .addReg(0U) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addFPImm(CF) + .addReg(0U) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else if (unsigned Reg = lookUpRegForValue(V)) { + // FIXME: This does not handle register-indirect values at offset 0. + bool IsIndirect = false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, + DI->getVariable(), DI->getExpression()); + } else { + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + } + return true; + } + case Intrinsic::dbg_label: { + const DbgLabelInst *DI = cast<DbgLabelInst>(II); + assert(DI->getLabel() && "Missing label"); + if (!FuncInfo.MF->getMMI().hasDebugInfo()) { + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + return true; + } + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); + return true; + } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); + + case Intrinsic::is_constant: + llvm_unreachable("llvm.is.constant.* should have been lowered already"); + + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::expect: { + unsigned ResultReg = getRegForValue(II->getArgOperand(0)); + if (!ResultReg) + return false; + updateValueMap(II, ResultReg); + return true; + } + case Intrinsic::experimental_stackmap: + return selectStackmap(II); + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + return selectPatchpoint(II); + + case Intrinsic::xray_customevent: + return selectXRayCustomEvent(II); + case Intrinsic::xray_typedevent: + return selectXRayTypedEvent(II); + } + + return fastLowerIntrinsicCall(II); +} + +bool FastISel::selectCast(const User *I, unsigned Opcode) { + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); + + if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || + !DstVT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // Check if the destination type is legal. + if (!TLI.isTypeLegal(DstVT)) + return false; + + // Check if the source operand is legal. + if (!TLI.isTypeLegal(SrcVT)) + return false; + + unsigned InputReg = getRegForValue(I->getOperand(0)); + if (!InputReg) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); + + unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), + Opcode, InputReg, InputRegIsKill); + if (!ResultReg) + return false; + + updateValueMap(I, ResultReg); + return true; +} + +bool FastISel::selectBitCast(const User *I) { + // If the bitcast doesn't change the type, just use the operand value. + if (I->getType() == I->getOperand(0)->getType()) { + unsigned Reg = getRegForValue(I->getOperand(0)); + if (!Reg) + return false; + updateValueMap(I, Reg); + return true; + } + + // Bitcasts of other values become reg-reg copies or BITCAST operators. + EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstEVT = TLI.getValueType(DL, I->getType()); + if (SrcEVT == MVT::Other || DstEVT == MVT::Other || + !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT)) + // Unhandled type. Halt "fast" selection and bail. + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DstVT = DstEVT.getSimpleVT(); + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (!Op0) // Unhandled operand. Halt "fast" selection and bail. + return false; + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); + + // First, try to perform the bitcast by inserting a reg-reg copy. + unsigned ResultReg = 0; + if (SrcVT == DstVT) { + const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT); + const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT); + // Don't attempt a cross-class copy. It will likely fail. + if (SrcClass == DstClass) { + ResultReg = createResultReg(DstClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0); + } + } + + // If the reg-reg copy failed, select a BITCAST opcode. + if (!ResultReg) + ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); + + if (!ResultReg) + return false; + + updateValueMap(I, ResultReg); + return true; +} + +// Remove local value instructions starting from the instruction after +// SavedLastLocalValue to the current function insert point. +void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) +{ + MachineInstr *CurLastLocalValue = getLastLocalValue(); + if (CurLastLocalValue != SavedLastLocalValue) { + // Find the first local value instruction to be deleted. + // This is the instruction after SavedLastLocalValue if it is non-NULL. + // Otherwise it's the first instruction in the block. + MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue); + if (SavedLastLocalValue) + ++FirstDeadInst; + else + FirstDeadInst = FuncInfo.MBB->getFirstNonPHI(); + setLastLocalValue(SavedLastLocalValue); + removeDeadCode(FirstDeadInst, FuncInfo.InsertPt); + } +} + +bool FastISel::selectInstruction(const Instruction *I) { + MachineInstr *SavedLastLocalValue = getLastLocalValue(); + // Just before the terminator instruction, insert instructions to + // feed PHI nodes in successor blocks. + if (I->isTerminator()) { + if (!handlePHINodesInSuccessorBlocks(I->getParent())) { + // PHI node handling may have generated local value instructions, + // even though it failed to handle all PHI nodes. + // We remove these instructions because SelectionDAGISel will generate + // them again. + removeDeadLocalValueCode(SavedLastLocalValue); + return false; + } + } + + // FastISel does not handle any operand bundles except OB_funclet. + if (ImmutableCallSite CS = ImmutableCallSite(I)) + for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) + if (CS.getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet) + return false; + + DbgLoc = I->getDebugLoc(); + + SavedInsertPt = FuncInfo.InsertPt; + + if (const auto *Call = dyn_cast<CallInst>(I)) { + const Function *F = Call->getCalledFunction(); + LibFunc Func; + + // As a special case, don't handle calls to builtin library functions that + // may be translated directly to target instructions. + if (F && !F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) + return false; + + // Don't handle Intrinsic::trap if a trap function is specified. + if (F && F->getIntrinsicID() == Intrinsic::trap && + Call->hasFnAttr("trap-func-name")) + return false; + } + + // First, try doing target-independent selection. + if (!SkipTargetIndependentISel) { + if (selectOperator(I, I->getOpcode())) { + ++NumFastIselSuccessIndependent; + DbgLoc = DebugLoc(); + return true; + } + // Remove dead code. + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + SavedInsertPt = FuncInfo.InsertPt; + } + // Next, try calling the target to attempt to handle the instruction. + if (fastSelectInstruction(I)) { + ++NumFastIselSuccessTarget; + DbgLoc = DebugLoc(); + return true; + } + // Remove dead code. + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + + DbgLoc = DebugLoc(); + // Undo phi node updates, because they will be added again by SelectionDAG. + if (I->isTerminator()) { + // PHI node handling may have generated local value instructions. + // We remove them because SelectionDAGISel will generate them again. + removeDeadLocalValueCode(SavedLastLocalValue); + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); + } + return false; +} + +/// Emit an unconditional branch to the given block, unless it is the immediate +/// (fall-through) successor, and update the CFG. +void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, + const DebugLoc &DbgLoc) { + if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 && + FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + // For more accurate line information if this is the only non-debug + // instruction in the block then emit it, otherwise we have the + // unconditional fall-through case, which needs no instructions. + } else { + // The unconditional branch case. + TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr, + SmallVector<MachineOperand, 0>(), DbgLoc); + } + if (FuncInfo.BPI) { + auto BranchProbability = FuncInfo.BPI->getEdgeProbability( + FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock()); + FuncInfo.MBB->addSuccessor(MSucc, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(MSucc); +} + +void FastISel::finishCondBranch(const BasicBlock *BranchBB, + MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB) { + // Add TrueMBB as successor unless it is equal to the FalseMBB: This can + // happen in degenerate IR and MachineIR forbids to have a block twice in the + // successor/predecessor lists. + if (TrueMBB != FalseMBB) { + if (FuncInfo.BPI) { + auto BranchProbability = + FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB); + } + + fastEmitBranch(FalseMBB, DbgLoc); +} + +/// Emit an FNeg operation. +bool FastISel::selectFNeg(const User *I, const Value *In) { + unsigned OpReg = getRegForValue(In); + if (!OpReg) + return false; + bool OpRegIsKill = hasTrivialKill(In); + + // If the target has ISD::FNEG, use it. + EVT VT = TLI.getValueType(DL, I->getType()); + unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, + OpReg, OpRegIsKill); + if (ResultReg) { + updateValueMap(I, ResultReg); + return true; + } + + // Bitcast the value to integer, twiddle the sign bit with xor, + // and then bitcast it back to floating-point. + if (VT.getSizeInBits() > 64) + return false; + EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits()); + if (!TLI.isTypeLegal(IntVT)) + return false; + + unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), + ISD::BITCAST, OpReg, OpRegIsKill); + if (!IntReg) + return false; + + unsigned IntResultReg = fastEmit_ri_( + IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true, + UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT()); + if (!IntResultReg) + return false; + + ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST, + IntResultReg, /*IsKill=*/true); + if (!ResultReg) + return false; + + updateValueMap(I, ResultReg); + return true; +} + +bool FastISel::selectExtractValue(const User *U) { + const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U); + if (!EVI) + return false; + + // Make sure we only try to handle extracts with a legal result. But also + // allow i1 because it's easy. + EVT RealVT = TLI.getValueType(DL, EVI->getType(), /*AllowUnknown=*/true); + if (!RealVT.isSimple()) + return false; + MVT VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT) && VT != MVT::i1) + return false; + + const Value *Op0 = EVI->getOperand(0); + Type *AggTy = Op0->getType(); + + // Get the base result register. + unsigned ResultReg; + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0); + if (I != FuncInfo.ValueMap.end()) + ResultReg = I->second; + else if (isa<Instruction>(Op0)) + ResultReg = FuncInfo.InitializeRegForValue(Op0); + else + return false; // fast-isel can't handle aggregate constants at the moment + + // Get the actual result register, which is an offset from the base register. + unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices()); + + SmallVector<EVT, 4> AggValueVTs; + ComputeValueVTs(TLI, DL, AggTy, AggValueVTs); + + for (unsigned i = 0; i < VTIndex; i++) + ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]); + + updateValueMap(EVI, ResultReg); + return true; +} + +bool FastISel::selectOperator(const User *I, unsigned Opcode) { + switch (Opcode) { + case Instruction::Add: + return selectBinaryOp(I, ISD::ADD); + case Instruction::FAdd: + return selectBinaryOp(I, ISD::FADD); + case Instruction::Sub: + return selectBinaryOp(I, ISD::SUB); + case Instruction::FSub: { + // FNeg is currently represented in LLVM IR as a special case of FSub. + Value *X; + if (match(I, m_FNeg(m_Value(X)))) + return selectFNeg(I, X); + return selectBinaryOp(I, ISD::FSUB); + } + case Instruction::Mul: + return selectBinaryOp(I, ISD::MUL); + case Instruction::FMul: + return selectBinaryOp(I, ISD::FMUL); + case Instruction::SDiv: + return selectBinaryOp(I, ISD::SDIV); + case Instruction::UDiv: + return selectBinaryOp(I, ISD::UDIV); + case Instruction::FDiv: + return selectBinaryOp(I, ISD::FDIV); + case Instruction::SRem: + return selectBinaryOp(I, ISD::SREM); + case Instruction::URem: + return selectBinaryOp(I, ISD::UREM); + case Instruction::FRem: + return selectBinaryOp(I, ISD::FREM); + case Instruction::Shl: + return selectBinaryOp(I, ISD::SHL); + case Instruction::LShr: + return selectBinaryOp(I, ISD::SRL); + case Instruction::AShr: + return selectBinaryOp(I, ISD::SRA); + case Instruction::And: + return selectBinaryOp(I, ISD::AND); + case Instruction::Or: + return selectBinaryOp(I, ISD::OR); + case Instruction::Xor: + return selectBinaryOp(I, ISD::XOR); + + case Instruction::FNeg: + return selectFNeg(I, I->getOperand(0)); + + case Instruction::GetElementPtr: + return selectGetElementPtr(I); + + case Instruction::Br: { + const BranchInst *BI = cast<BranchInst>(I); + + if (BI->isUnconditional()) { + const BasicBlock *LLVMSucc = BI->getSuccessor(0); + MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; + fastEmitBranch(MSucc, BI->getDebugLoc()); + return true; + } + + // Conditional branches are not handed yet. + // Halt "fast" selection and bail. + return false; + } + + case Instruction::Unreachable: + if (TM.Options.TrapUnreachable) + return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; + else + return true; + + case Instruction::Alloca: + // FunctionLowering has the static-sized case covered. + if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I))) + return true; + + // Dynamic-sized alloca is not handled yet. + return false; + + case Instruction::Call: + // On AIX, call lowering uses the DAG-ISEL path currently so that the + // callee of the direct function call instruction will be mapped to the + // symbol for the function's entry point, which is distinct from the + // function descriptor symbol. The latter is the symbol whose XCOFF symbol + // name is the C-linkage name of the source level function. + if (TM.getTargetTriple().isOSAIX()) + return false; + return selectCall(I); + + case Instruction::BitCast: + return selectBitCast(I); + + case Instruction::FPToSI: + return selectCast(I, ISD::FP_TO_SINT); + case Instruction::ZExt: + return selectCast(I, ISD::ZERO_EXTEND); + case Instruction::SExt: + return selectCast(I, ISD::SIGN_EXTEND); + case Instruction::Trunc: + return selectCast(I, ISD::TRUNCATE); + case Instruction::SIToFP: + return selectCast(I, ISD::SINT_TO_FP); + + case Instruction::IntToPtr: // Deliberate fall-through. + case Instruction::PtrToInt: { + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); + if (DstVT.bitsGT(SrcVT)) + return selectCast(I, ISD::ZERO_EXTEND); + if (DstVT.bitsLT(SrcVT)) + return selectCast(I, ISD::TRUNCATE); + unsigned Reg = getRegForValue(I->getOperand(0)); + if (!Reg) + return false; + updateValueMap(I, Reg); + return true; + } + + case Instruction::ExtractValue: + return selectExtractValue(I); + + case Instruction::PHI: + llvm_unreachable("FastISel shouldn't visit PHI nodes!"); + + default: + // Unhandled instruction. Halt "fast" selection and bail. + return false; + } +} + +FastISel::FastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo, + bool SkipTargetIndependentISel) + : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()), + MFI(FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), + TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()), + TII(*MF->getSubtarget().getInstrInfo()), + TLI(*MF->getSubtarget().getTargetLowering()), + TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), + SkipTargetIndependentISel(SkipTargetIndependentISel) {} + +FastISel::~FastISel() = default; + +bool FastISel::fastLowerArguments() { return false; } + +bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; } + +bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) { + return false; +} + +unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; } + +unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/) { + return 0; +} + +unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, unsigned /*Op1*/, + bool /*Op1IsKill*/) { + return 0; +} + +unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::fastEmit_f(MVT, MVT, unsigned, + const ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, uint64_t /*Imm*/) { + return 0; +} + +/// This method is a wrapper of fastEmit_ri. It first tries to emit an +/// instruction with an immediate operand using fastEmit_ri. +/// If that fails, it materializes the immediate into a register and try +/// fastEmit_rr instead. +unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, + bool Op0IsKill, uint64_t Imm, MVT ImmType) { + // If this is a multiply by a power of two, emit this as a shift left. + if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { + Opcode = ISD::SHL; + Imm = Log2_64(Imm); + } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) { + // div x, 8 -> srl x, 3 + Opcode = ISD::SRL; + Imm = Log2_64(Imm); + } + + // Horrible hack (to be removed), check to make sure shift amounts are + // in-range. + if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) && + Imm >= VT.getSizeInBits()) + return 0; + + // First check if immediate type is legal. If not, we can't use the ri form. + unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); + if (ResultReg) + return ResultReg; + unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + bool IsImmKill = true; + if (!MaterialReg) { + // This is a bit ugly/slow, but failing here means falling out of + // fast-isel, which would be very slow. + IntegerType *ITy = + IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); + MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); + if (!MaterialReg) + return 0; + // FIXME: If the materialized register here has no uses yet then this + // will be the first use and we should be able to mark it as killed. + // However, the local value area for materialising constant expressions + // grows down, not up, which means that any constant expressions we generate + // later which also use 'Imm' could be after this instruction and therefore + // after this kill. + IsImmKill = false; + } + return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill); +} + +unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { + return MRI.createVirtualRegister(RC); +} + +unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, + unsigned OpNum) { + if (Register::isVirtualRegister(Op)) { + const TargetRegisterClass *RegClass = + TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); + if (!MRI.constrainRegClass(Op, RegClass)) { + // If it's not legal to COPY between the register classes, something + // has gone very wrong before we got here. + unsigned NewOp = createResultReg(RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), NewOp).addReg(Op); + return NewOp; + } + } + return Op; +} + +unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass *RC) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); + return ResultReg; +} + +unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + + return ResultReg; +} + +unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill, unsigned Op2, + bool Op2IsKill) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, uint64_t Imm) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, uint64_t Imm1, + uint64_t Imm2) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm1) + .addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm1) + .addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + const ConstantFP *FPImm) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addFPImm(FPImm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addFPImm(FPImm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill, uint64_t Imm) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, + bool Op0IsKill, uint32_t Idx) { + unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + assert(Register::isVirtualRegister(Op0) && + "Cannot yet extract from physregs"); + const TargetRegisterClass *RC = MRI.getRegClass(Op0); + MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx); + return ResultReg; +} + +/// Emit MachineInstrs to compute the value of Op with all but the least +/// significant bit set to zero. +unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { + return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); +} + +/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. +/// Emit code to ensure constants are copied into registers when needed. +/// Remember the virtual registers that need to be added to the Machine PHI +/// nodes as input. We cannot just directly add them, because expansion +/// might result in multiple MBB's for one BB. As such, the start of the +/// BB might correspond to a different MBB than the end. +bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const Instruction *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + const BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) + continue; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB).second) + continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (const PHINode &PN : SuccBB->phis()) { + // Ignore dead phi's. + if (PN.use_empty()) + continue; + + // Only handle legal types. Two interesting things to note here. First, + // by bailing out early, we may leave behind some dead instructions, + // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its + // own moves. Second, this check is necessary because FastISel doesn't + // use CreateRegs to create registers, so it always creates + // exactly one register for each non-void instruction. + EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true); + if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { + // Handle integer promotions, though, because they're common and easy. + if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) { + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); + return false; + } + } + + const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); + + // Set the DebugLoc for the copy. Prefer the location of the operand + // if there is one; use the location of the PHI otherwise. + DbgLoc = PN.getDebugLoc(); + if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) + DbgLoc = Inst->getDebugLoc(); + + unsigned Reg = getRegForValue(PHIOp); + if (!Reg) { + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); + return false; + } + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg)); + DbgLoc = DebugLoc(); + } + } + + return true; +} + +bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { + assert(LI->hasOneUse() && + "tryToFoldLoad expected a LoadInst with a single use"); + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. Handle + // this by scanning the single-use users of the load until we get to FoldInst. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + + const Instruction *TheUser = LI->user_back(); + while (TheUser != FoldInst && // Scan up until we find FoldInst. + // Stay in the right block. + TheUser->getParent() == FoldInst->getParent() && + --MaxUsers) { // Don't scan too far. + // If there are multiple or no uses of this instruction, then bail out. + if (!TheUser->hasOneUse()) + return false; + + TheUser = TheUser->user_back(); + } + + // If we didn't find the fold instruction, then we failed to collapse the + // sequence. + if (TheUser != FoldInst) + return false; + + // Don't try to fold volatile loads. Target has to deal with alignment + // constraints. + if (LI->isVolatile()) + return false; + + // Figure out which vreg this is going into. If there is no assigned vreg yet + // then there actually was no reference to it. Perhaps the load is referenced + // by a dead instruction. + unsigned LoadReg = getRegForValue(LI); + if (!LoadReg) + return false; + + // We can't fold if this vreg has no uses or more than one use. Multiple uses + // may mean that the instruction got lowered to multiple MIs, or the use of + // the loaded value ended up being multiple operands of the result. + if (!MRI.hasOneUse(LoadReg)) + return false; + + MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); + MachineInstr *User = RI->getParent(); + + // Set the insertion point properly. Folding the load can cause generation of + // other random instructions (like sign extends) for addressing modes; make + // sure they get inserted in a logical place before the new instruction. + FuncInfo.InsertPt = User; + FuncInfo.MBB = User->getParent(); + + // Ask the target to try folding the load. + return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI); +} + +bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { + // Must be an add. + if (!isa<AddOperator>(Add)) + return false; + // Type size needs to match. + if (DL.getTypeSizeInBits(GEP->getType()) != + DL.getTypeSizeInBits(Add->getType())) + return false; + // Must be in the same basic block. + if (isa<Instruction>(Add) && + FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB) + return false; + // Must have a constant operand. + return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1)); +} + +MachineMemOperand * +FastISel::createMachineMemOperandFor(const Instruction *I) const { + const Value *Ptr; + Type *ValTy; + unsigned Alignment; + MachineMemOperand::Flags Flags; + bool IsVolatile; + + if (const auto *LI = dyn_cast<LoadInst>(I)) { + Alignment = LI->getAlignment(); + IsVolatile = LI->isVolatile(); + Flags = MachineMemOperand::MOLoad; + Ptr = LI->getPointerOperand(); + ValTy = LI->getType(); + } else if (const auto *SI = dyn_cast<StoreInst>(I)) { + Alignment = SI->getAlignment(); + IsVolatile = SI->isVolatile(); + Flags = MachineMemOperand::MOStore; + Ptr = SI->getPointerOperand(); + ValTy = SI->getValueOperand()->getType(); + } else + return nullptr; + + bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal); + bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load); + bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable); + const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); + + AAMDNodes AAInfo; + I->getAAMetadata(AAInfo); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0. + Alignment = DL.getABITypeAlignment(ValTy); + + unsigned Size = DL.getTypeStoreSize(ValTy); + + if (IsVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (IsNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + if (IsDereferenceable) + Flags |= MachineMemOperand::MODereferenceable; + if (IsInvariant) + Flags |= MachineMemOperand::MOInvariant; + + return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, + Alignment, AAInfo, Ranges); +} + +CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const { + // If both operands are the same, then try to optimize or fold the cmp. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (CI->getOperand(0) != CI->getOperand(1)) + return Predicate; + + switch (Predicate) { + default: llvm_unreachable("Invalid predicate!"); + case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break; + + case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break; + } + + return Predicate; +} diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp new file mode 100644 index 0000000000000..cf6711adad48d --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -0,0 +1,546 @@ +//===-- FunctionLoweringInfo.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating functions from LLVM IR into +// Machine IR. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WasmEHFuncInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "function-lowering-info" + +/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by +/// PHI nodes or outside of the basic block that defines it, or used by a +/// switch or atomic instruction, which may expand to multiple basic blocks. +static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { + if (I->use_empty()) return false; + if (isa<PHINode>(I)) return true; + const BasicBlock *BB = I->getParent(); + for (const User *U : I->users()) + if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U)) + return true; + + return false; +} + +static ISD::NodeType getPreferredExtendForValue(const Value *V) { + // For the users of the source value being used for compare instruction, if + // the number of signed predicate is greater than unsigned predicate, we + // prefer to use SIGN_EXTEND. + // + // With this optimization, we would be able to reduce some redundant sign or + // zero extension instruction, and eventually more machine CSE opportunities + // can be exposed. + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + unsigned NumOfSigned = 0, NumOfUnsigned = 0; + for (const User *U : V->users()) { + if (const auto *CI = dyn_cast<CmpInst>(U)) { + NumOfSigned += CI->isSigned(); + NumOfUnsigned += CI->isUnsigned(); + } + } + if (NumOfSigned > NumOfUnsigned) + ExtendKind = ISD::SIGN_EXTEND; + + return ExtendKind; +} + +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, + SelectionDAG *DAG) { + Fn = &fn; + MF = &mf; + TLI = MF->getSubtarget().getTargetLowering(); + RegInfo = &MF->getRegInfo(); + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + unsigned StackAlign = TFI->getStackAlignment(); + DA = DAG->getDivergenceAnalysis(); + + // Check whether the function can return without sret-demotion. + SmallVector<ISD::OutputArg, 4> Outs; + CallingConv::ID CC = Fn->getCallingConv(); + + GetReturnInfo(CC, Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI, + mf.getDataLayout()); + CanLowerReturn = + TLI->CanLowerReturn(CC, *MF, Fn->isVarArg(), Outs, Fn->getContext()); + + // If this personality uses funclets, we need to do a bit more work. + DenseMap<const AllocaInst *, TinyPtrVector<int *>> CatchObjects; + EHPersonality Personality = classifyEHPersonality( + Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr); + if (isFuncletEHPersonality(Personality)) { + // Calculate state numbers if we haven't already. + WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); + if (Personality == EHPersonality::MSVC_CXX) + calculateWinCXXEHStateNumbers(&fn, EHInfo); + else if (isAsynchronousEHPersonality(Personality)) + calculateSEHStateNumbers(&fn, EHInfo); + else if (Personality == EHPersonality::CoreCLR) + calculateClrEHStateNumbers(&fn, EHInfo); + + // Map all BB references in the WinEH data to MBBs. + for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { + for (WinEHHandlerType &H : TBME.HandlerArray) { + if (const AllocaInst *AI = H.CatchObj.Alloca) + CatchObjects.insert({AI, {}}).first->second.push_back( + &H.CatchObj.FrameIndex); + else + H.CatchObj.FrameIndex = INT_MAX; + } + } + } + if (Personality == EHPersonality::Wasm_CXX) { + WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); + calculateWasmEHInfo(&fn, EHInfo); + } + + // Initialize the mapping of values to registers. This is only set up for + // instruction values that are used outside of the block that defines + // them. + for (const BasicBlock &BB : *Fn) { + for (const Instruction &I : BB) { + if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { + Type *Ty = AI->getAllocatedType(); + unsigned Align = + std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), + AI->getAlignment()); + + // Static allocas can be folded into the initial stack frame + // adjustment. For targets that don't realign the stack, don't + // do this if there is an extra alignment requirement. + if (AI->isStaticAlloca() && + (TFI->isStackRealignable() || (Align <= StackAlign))) { + const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); + uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + int FrameIndex = INT_MAX; + auto Iter = CatchObjects.find(AI); + if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) { + FrameIndex = MF->getFrameInfo().CreateFixedObject( + TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true); + MF->getFrameInfo().setObjectAlignment(FrameIndex, Align); + } else { + FrameIndex = + MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI); + } + + StaticAllocaMap[AI] = FrameIndex; + // Update the catch handler information. + if (Iter != CatchObjects.end()) { + for (int *CatchObjPtr : Iter->second) + *CatchObjPtr = FrameIndex; + } + } else { + // FIXME: Overaligned static allocas should be grouped into + // a single dynamic allocation instead of using a separate + // stack allocation for each one. + if (Align <= StackAlign) + Align = 0; + // Inform the Frame Information that we have variable-sized objects. + MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, AI); + } + } + + // Look for inline asm that clobbers the SP register. + if (isa<CallInst>(I) || isa<InvokeInst>(I)) { + ImmutableCallSite CS(&I); + if (isa<InlineAsm>(CS.getCalledValue())) { + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + std::vector<TargetLowering::AsmOperandInfo> Ops = + TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS); + for (TargetLowering::AsmOperandInfo &Op : Ops) { + if (Op.Type == InlineAsm::isClobber) { + // Clobbers don't have SDValue operands, hence SDValue(). + TLI->ComputeConstraintToUse(Op, SDValue(), DAG); + std::pair<unsigned, const TargetRegisterClass *> PhysReg = + TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode, + Op.ConstraintVT); + if (PhysReg.first == SP) + MF->getFrameInfo().setHasOpaqueSPAdjustment(true); + } + } + } + } + + // Look for calls to the @llvm.va_start intrinsic. We can omit some + // prologue boilerplate for variadic functions that don't examine their + // arguments. + if (const auto *II = dyn_cast<IntrinsicInst>(&I)) { + if (II->getIntrinsicID() == Intrinsic::vastart) + MF->getFrameInfo().setHasVAStart(true); + } + + // If we have a musttail call in a variadic function, we need to ensure we + // forward implicit register parameters. + if (const auto *CI = dyn_cast<CallInst>(&I)) { + if (CI->isMustTailCall() && Fn->isVarArg()) + MF->getFrameInfo().setHasMustTailInVarArgFunc(true); + } + + // Mark values used outside their block as exported, by allocating + // a virtual register for them. + if (isUsedOutsideOfDefiningBlock(&I)) + if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I))) + InitializeRegForValue(&I); + + // Decide the preferred extend type for a value. + PreferredExtendType[&I] = getPreferredExtendForValue(&I); + } + } + + // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This + // also creates the initial PHI MachineInstrs, though none of the input + // operands are populated. + for (const BasicBlock &BB : *Fn) { + // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks + // are really data, and no instructions can live here. + if (BB.isEHPad()) { + const Instruction *PadInst = BB.getFirstNonPHI(); + // If this is a non-landingpad EH pad, mark this function as using + // funclets. + // FIXME: SEH catchpads do not create EH scope/funclets, so we could avoid + // setting this in such cases in order to improve frame layout. + if (!isa<LandingPadInst>(PadInst)) { + MF->setHasEHScopes(true); + MF->setHasEHFunclets(true); + MF->getFrameInfo().setHasOpaqueSPAdjustment(true); + } + if (isa<CatchSwitchInst>(PadInst)) { + assert(&*BB.begin() == PadInst && + "WinEHPrepare failed to remove PHIs from imaginary BBs"); + continue; + } + if (isa<FuncletPadInst>(PadInst)) + assert(&*BB.begin() == PadInst && "WinEHPrepare failed to demote PHIs"); + } + + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&BB); + MBBMap[&BB] = MBB; + MF->push_back(MBB); + + // Transfer the address-taken flag. This is necessary because there could + // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only + // the first one should be marked. + if (BB.hasAddressTaken()) + MBB->setHasAddressTaken(); + + // Mark landing pad blocks. + if (BB.isEHPad()) + MBB->setIsEHPad(); + + // Create Machine PHI nodes for LLVM PHI nodes, lowering them as + // appropriate. + for (const PHINode &PN : BB.phis()) { + if (PN.use_empty()) + continue; + + // Skip empty types + if (PN.getType()->isEmptyTy()) + continue; + + DebugLoc DL = PN.getDebugLoc(); + unsigned PHIReg = ValueMap[&PN]; + assert(PHIReg && "PHI node does not have an assigned virtual register!"); + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*TLI, MF->getDataLayout(), PN.getType(), ValueVTs); + for (EVT VT : ValueVTs) { + unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + for (unsigned i = 0; i != NumRegisters; ++i) + BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); + PHIReg += NumRegisters; + } + } + } + + if (isFuncletEHPersonality(Personality)) { + WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); + + // Map all BB references in the WinEH data to MBBs. + for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { + for (WinEHHandlerType &H : TBME.HandlerArray) { + if (H.Handler) + H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; + } + } + for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap) + if (UME.Cleanup) + UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()]; + for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) { + const auto *BB = UME.Handler.get<const BasicBlock *>(); + UME.Handler = MBBMap[BB]; + } + for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) { + const auto *BB = CME.Handler.get<const BasicBlock *>(); + CME.Handler = MBBMap[BB]; + } + } + + else if (Personality == EHPersonality::Wasm_CXX) { + WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); + // Map all BB references in the WinEH data to MBBs. + DenseMap<BBOrMBB, BBOrMBB> NewMap; + for (auto &KV : EHInfo.EHPadUnwindMap) { + const auto *Src = KV.first.get<const BasicBlock *>(); + const auto *Dst = KV.second.get<const BasicBlock *>(); + NewMap[MBBMap[Src]] = MBBMap[Dst]; + } + EHInfo.EHPadUnwindMap = std::move(NewMap); + } +} + +/// clear - Clear out all the function-specific state. This returns this +/// FunctionLoweringInfo to an empty state, ready to be used for a +/// different function. +void FunctionLoweringInfo::clear() { + MBBMap.clear(); + ValueMap.clear(); + VirtReg2Value.clear(); + StaticAllocaMap.clear(); + LiveOutRegInfo.clear(); + VisitedBBs.clear(); + ArgDbgValues.clear(); + DescribedArgs.clear(); + ByValArgFrameIndexMap.clear(); + RegFixups.clear(); + RegsWithFixups.clear(); + StatepointStackSlots.clear(); + StatepointSpillMaps.clear(); + PreferredExtendType.clear(); +} + +/// CreateReg - Allocate a single virtual register for the given type. +unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { + return RegInfo->createVirtualRegister( + MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent)); +} + +/// CreateRegs - Allocate the appropriate number of virtual registers of +/// the correctly promoted or expanded types. Assign these registers +/// consecutive vreg numbers and return the first assigned number. +/// +/// In the case that the given value has struct or array type, this function +/// will assign registers for each member or element. +/// +unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); + + unsigned FirstReg = 0; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT); + + unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) { + unsigned R = CreateReg(RegisterVT, isDivergent); + if (!FirstReg) FirstReg = R; + } + } + return FirstReg; +} + +unsigned FunctionLoweringInfo::CreateRegs(const Value *V) { + return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) && + DA->isDivergent(V)); +} + +/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the +/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If +/// the register's LiveOutInfo is for a smaller bit width, it is extended to +/// the larger bit width by zero extension. The bit width must be no smaller +/// than the LiveOutInfo's existing bit width. +const FunctionLoweringInfo::LiveOutInfo * +FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { + if (!LiveOutRegInfo.inBounds(Reg)) + return nullptr; + + LiveOutInfo *LOI = &LiveOutRegInfo[Reg]; + if (!LOI->IsValid) + return nullptr; + + if (BitWidth > LOI->Known.getBitWidth()) { + LOI->NumSignBits = 1; + LOI->Known = LOI->Known.zext(BitWidth, false /* => any extend */); + } + + return LOI; +} + +/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination +/// register based on the LiveOutInfo of its operands. +void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { + Type *Ty = PN->getType(); + if (!Ty->isIntegerTy() || Ty->isVectorTy()) + return; + + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); + assert(ValueVTs.size() == 1 && + "PHIs with non-vector integer types should have a single VT."); + EVT IntVT = ValueVTs[0]; + + if (TLI->getNumRegisters(PN->getContext(), IntVT) != 1) + return; + IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT); + unsigned BitWidth = IntVT.getSizeInBits(); + + unsigned DestReg = ValueMap[PN]; + if (!Register::isVirtualRegister(DestReg)) + return; + LiveOutRegInfo.grow(DestReg); + LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; + + Value *V = PN->getIncomingValue(0); + if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) { + DestLOI.NumSignBits = 1; + DestLOI.Known = KnownBits(BitWidth); + return; + } + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + APInt Val = CI->getValue().zextOrTrunc(BitWidth); + DestLOI.NumSignBits = Val.getNumSignBits(); + DestLOI.Known.Zero = ~Val; + DestLOI.Known.One = Val; + } else { + assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" + "CopyToReg node was created."); + unsigned SrcReg = ValueMap[V]; + if (!Register::isVirtualRegister(SrcReg)) { + DestLOI.IsValid = false; + return; + } + const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth); + if (!SrcLOI) { + DestLOI.IsValid = false; + return; + } + DestLOI = *SrcLOI; + } + + assert(DestLOI.Known.Zero.getBitWidth() == BitWidth && + DestLOI.Known.One.getBitWidth() == BitWidth && + "Masks should have the same bit width as the type."); + + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) { + DestLOI.NumSignBits = 1; + DestLOI.Known = KnownBits(BitWidth); + return; + } + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + APInt Val = CI->getValue().zextOrTrunc(BitWidth); + DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits()); + DestLOI.Known.Zero &= ~Val; + DestLOI.Known.One &= Val; + continue; + } + + assert(ValueMap.count(V) && "V should have been placed in ValueMap when " + "its CopyToReg node was created."); + unsigned SrcReg = ValueMap[V]; + if (!Register::isVirtualRegister(SrcReg)) { + DestLOI.IsValid = false; + return; + } + const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth); + if (!SrcLOI) { + DestLOI.IsValid = false; + return; + } + DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits); + DestLOI.Known.Zero &= SrcLOI->Known.Zero; + DestLOI.Known.One &= SrcLOI->Known.One; + } +} + +/// setArgumentFrameIndex - Record frame index for the byval +/// argument. This overrides previous frame index entry for this argument, +/// if any. +void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A, + int FI) { + ByValArgFrameIndexMap[A] = FI; +} + +/// getArgumentFrameIndex - Get frame index for the byval argument. +/// If the argument does not have any assigned frame index then 0 is +/// returned. +int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { + auto I = ByValArgFrameIndexMap.find(A); + if (I != ByValArgFrameIndexMap.end()) + return I->second; + LLVM_DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); + return INT_MAX; +} + +unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( + const Value *CPI, const TargetRegisterClass *RC) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + auto I = CatchPadExceptionPointers.insert({CPI, 0}); + unsigned &VReg = I.first->second; + if (I.second) + VReg = MRI.createVirtualRegister(RC); + assert(VReg && "null vreg in exception pointer table!"); + return VReg; +} + +const Value * +FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) { + if (VirtReg2Value.empty()) { + SmallVector<EVT, 4> ValueVTs; + for (auto &P : ValueMap) { + ValueVTs.clear(); + ComputeValueVTs(*TLI, Fn->getParent()->getDataLayout(), + P.first->getType(), ValueVTs); + unsigned Reg = P.second; + for (EVT VT : ValueVTs) { + unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); + for (unsigned i = 0, e = NumRegisters; i != e; ++i) + VirtReg2Value[Reg++] = P.first; + } + } + } + return VirtReg2Value.lookup(Vreg); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp new file mode 100644 index 0000000000000..c5095995ec2e0 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -0,0 +1,1165 @@ +//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#include "InstrEmitter.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +#define DEBUG_TYPE "instr-emitter" + +/// MinRCSize - Smallest register class we allow when constraining virtual +/// registers. If satisfying all register class constraints would require +/// using a smaller register class, emit a COPY to a new virtual register +/// instead. +const unsigned MinRCSize = 4; + +/// CountResults - The results of target nodes have register or immediate +/// operands first, then an optional chain, and optional glue operands (which do +/// not go into the resulting MachineInstr). +unsigned InstrEmitter::CountResults(SDNode *Node) { + unsigned N = Node->getNumValues(); + while (N && Node->getValueType(N - 1) == MVT::Glue) + --N; + if (N && Node->getValueType(N - 1) == MVT::Other) + --N; // Skip over chain result. + return N; +} + +/// countOperands - The inputs to target nodes have any actual inputs first, +/// followed by an optional chain operand, then an optional glue operand. +/// Compute the number of actual operands that will go into the resulting +/// MachineInstr. +/// +/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding +/// the chain and glue. These operands may be implicit on the machine instr. +static unsigned countOperands(SDNode *Node, unsigned NumExpUses, + unsigned &NumImpUses) { + unsigned N = Node->getNumOperands(); + while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) + --N; + if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) + --N; // Ignore chain if it exists. + + // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. + NumImpUses = N - NumExpUses; + for (unsigned I = N; I > NumExpUses; --I) { + if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) + continue; + if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) + if (Register::isPhysicalRegister(RN->getReg())) + continue; + NumImpUses = N - I; + break; + } + + return N; +} + +/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an +/// implicit physical register output. +void InstrEmitter:: +EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, + unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VRBase = 0; + if (Register::isVirtualRegister(SrcReg)) { + // Just use the input register directly! + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + return; + } + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + bool MatchReg = true; + const TargetRegisterClass *UseRC = nullptr; + MVT VT = Node->getSimpleValueType(ResNo); + + // Stick to the preferred register classes for legal types. + if (TLI->isTypeLegal(VT)) + UseRC = TLI->getRegClassFor(VT, Node->isDivergent()); + + if (!IsClone && !IsCloned) + for (SDNode *User : Node->uses()) { + bool Match = true; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (Register::isVirtualRegister(DestReg)) { + VRBase = DestReg; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op.getNode() != Node || Op.getResNo() != ResNo) + continue; + MVT VT = Node->getSimpleValueType(Op.getResNo()); + if (VT == MVT::Other || VT == MVT::Glue) + continue; + Match = false; + if (User->isMachineOpcode()) { + const MCInstrDesc &II = TII->get(User->getMachineOpcode()); + const TargetRegisterClass *RC = nullptr; + if (i+II.getNumDefs() < II.getNumOperands()) { + RC = TRI->getAllocatableClass( + TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF)); + } + if (!UseRC) + UseRC = RC; + else if (RC) { + const TargetRegisterClass *ComRC = + TRI->getCommonSubClass(UseRC, RC); + // If multiple uses expect disjoint register classes, we emit + // copies in AddRegisterOperand. + if (ComRC) + UseRC = ComRC; + } + } + } + } + MatchReg &= Match; + if (VRBase) + break; + } + + const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr; + SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); + + // Figure out the register class to create for the destreg. + if (VRBase) { + DstRC = MRI->getRegClass(VRBase); + } else if (UseRC) { + assert(TRI->isTypeLegalForClass(*UseRC, VT) && + "Incompatible phys register def and uses!"); + DstRC = UseRC; + } else { + DstRC = TLI->getRegClassFor(VT, Node->isDivergent()); + } + + // If all uses are reading from the src physical register and copying the + // register is either impossible or very expensive, then don't create a copy. + if (MatchReg && SrcRC->getCopyCost() < 0) { + VRBase = SrcReg; + } else { + // Create the reg, emit the copy. + VRBase = MRI->createVirtualRegister(DstRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + VRBase).addReg(SrcReg); + } + + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +void InstrEmitter::CreateVirtualRegisters(SDNode *Node, + MachineInstrBuilder &MIB, + const MCInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && + "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + + unsigned NumResults = CountResults(Node); + for (unsigned i = 0; i < II.getNumDefs(); ++i) { + // If the specific node value is only used by a CopyToReg and the dest reg + // is a vreg in the same register class, use the CopyToReg'd destination + // register instead of creating a new vreg. + unsigned VRBase = 0; + const TargetRegisterClass *RC = + TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); + // Always let the value type influence the used register class. The + // constraints on the instruction may be too lax to represent the value + // type correctly. For example, a 64-bit float (X86::FR64) can't live in + // the 32-bit float super-class (X86::FR32). + if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) { + const TargetRegisterClass *VTRC = TLI->getRegClassFor( + Node->getSimpleValueType(i), + (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC)))); + if (RC) + VTRC = TRI->getCommonSubClass(RC, VTRC); + if (VTRC) + RC = VTRC; + } + + if (II.OpInfo[i].isOptionalDef()) { + // Optional def must be a physical register. + VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); + assert(Register::isPhysicalRegister(VRBase)); + MIB.addReg(VRBase, RegState::Define); + } + + if (!VRBase && !IsClone && !IsCloned) + for (SDNode *User : Node->uses()) { + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == i) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (Register::isVirtualRegister(Reg)) { + const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); + if (RegRC == RC) { + VRBase = Reg; + MIB.addReg(VRBase, RegState::Define); + break; + } + } + } + } + + // Create the result registers for this node and add the result regs to + // the machine instruction. + if (VRBase == 0) { + assert(RC && "Isn't a register operand!"); + VRBase = MRI->createVirtualRegister(RC); + MIB.addReg(VRBase, RegState::Define); + } + + // If this def corresponds to a result of the SDNode insert the VRBase into + // the lookup map. + if (i < NumResults) { + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } + } +} + +/// getVR - Return the virtual register corresponding to the specified result +/// of the specified node. +unsigned InstrEmitter::getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Op.isMachineOpcode() && + Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + // Add an IMPLICIT_DEF instruction before every use. + // IMPLICIT_DEF can produce any type of result so its MCInstrDesc + // does not include operand register class info. + const TargetRegisterClass *RC = TLI->getRegClassFor( + Op.getSimpleValueType(), Op.getNode()->isDivergent()); + Register VReg = MRI->createVirtualRegister(RC); + BuildMI(*MBB, InsertPos, Op.getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), VReg); + return VReg; + } + + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + assert(I != VRBaseMap.end() && "Node emitted out of order - late"); + return I->second; +} + + +/// AddRegisterOperand - Add the specified register as an operand to the +/// specified machine instr. Insert register copies if the register is +/// not in the required register class. +void +InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned) { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Glue && + "Chain and glue operands should occur at end of operand list!"); + // Get/emit the operand. + unsigned VReg = getVR(Op, VRBaseMap); + + const MCInstrDesc &MCID = MIB->getDesc(); + bool isOptDef = IIOpNum < MCID.getNumOperands() && + MCID.OpInfo[IIOpNum].isOptionalDef(); + + // If the instruction requires a register in a different class, create + // a new virtual register and copy the value into it, but first attempt to + // shrink VReg's register class within reason. For example, if VReg == GR32 + // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP. + if (II) { + const TargetRegisterClass *OpRC = nullptr; + if (IIOpNum < II->getNumOperands()) + OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF); + + if (OpRC) { + const TargetRegisterClass *ConstrainedRC + = MRI->constrainRegClass(VReg, OpRC, MinRCSize); + if (!ConstrainedRC) { + OpRC = TRI->getAllocatableClass(OpRC); + assert(OpRC && "Constraints cannot be fulfilled for allocation"); + Register NewVReg = MRI->createVirtualRegister(OpRC); + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); + VReg = NewVReg; + } else { + assert(ConstrainedRC->isAllocatable() && + "Constraining an allocatable VReg produced an unallocatable class?"); + } + } + } + + // If this value has only one use, that use is a kill. This is a + // conservative approximation. InstrEmitter does trivial coalescing + // with CopyFromReg nodes, so don't emit kill flags for them. + // Avoid kill flags on Schedule cloned nodes, since there will be + // multiple uses. + // Tied operands are never killed, so we need to check that. And that + // means we need to determine the index of the operand. + bool isKill = Op.hasOneUse() && + Op.getNode()->getOpcode() != ISD::CopyFromReg && + !IsDebug && + !(IsClone || IsCloned); + if (isKill) { + unsigned Idx = MIB->getNumOperands(); + while (Idx > 0 && + MIB->getOperand(Idx-1).isReg() && + MIB->getOperand(Idx-1).isImplicit()) + --Idx; + bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1; + if (isTied) + isKill = false; + } + + MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) | + getDebugRegState(IsDebug)); +} + +/// AddOperand - Add the specified operand to the specified machine instr. II +/// specifies the instruction information for the node, and IIOpNum is the +/// operand number (in the II) that we are adding. +void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned) { + if (Op.isMachineOpcode()) { + AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); + } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { + MIB.addImm(C->getSExtValue()); + } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { + MIB.addFPImm(F->getConstantFPValue()); + } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { + unsigned VReg = R->getReg(); + MVT OpVT = Op.getSimpleValueType(); + const TargetRegisterClass *IIRC = + II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) + : nullptr; + const TargetRegisterClass *OpRC = + TLI->isTypeLegal(OpVT) + ? TLI->getRegClassFor(OpVT, + Op.getNode()->isDivergent() || + (IIRC && TRI->isDivergentRegClass(IIRC))) + : nullptr; + + if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) { + Register NewVReg = MRI->createVirtualRegister(IIRC); + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); + VReg = NewVReg; + } + // Turn additional physreg operands into implicit uses on non-variadic + // instructions. This is used by call and return instructions passing + // arguments in registers. + bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); + MIB.addReg(VReg, getImplRegState(Imp)); + } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { + MIB.addRegMask(RM->getRegMask()); + } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { + MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(), + TGA->getTargetFlags()); + } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) { + MIB.addMBB(BBNode->getBasicBlock()); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + MIB.addFrameIndex(FI->getIndex()); + } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) { + MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags()); + } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { + int Offset = CP->getOffset(); + unsigned Align = CP->getAlignment(); + Type *Type = CP->getType(); + // MachineConstantPool wants an explicit alignment. + if (Align == 0) { + Align = MF->getDataLayout().getPrefTypeAlignment(Type); + if (Align == 0) { + // Alignment of vector types. FIXME! + Align = MF->getDataLayout().getTypeAllocSize(Type); + } + } + + unsigned Idx; + MachineConstantPool *MCP = MF->getConstantPool(); + if (CP->isMachineConstantPoolEntry()) + Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); + else + Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); + MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags()); + } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { + MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags()); + } else if (auto *SymNode = dyn_cast<MCSymbolSDNode>(Op)) { + MIB.addSym(SymNode->getMCSymbol()); + } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { + MIB.addBlockAddress(BA->getBlockAddress(), + BA->getOffset(), + BA->getTargetFlags()); + } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { + MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags()); + } else { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Glue && + "Chain and glue operands should occur at end of operand list!"); + AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); + } +} + +unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, + MVT VT, bool isDivergent, const DebugLoc &DL) { + const TargetRegisterClass *VRC = MRI->getRegClass(VReg); + const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); + + // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg + // within reason. + if (RC && RC != VRC) + RC = MRI->constrainRegClass(VReg, RC, MinRCSize); + + // VReg has been adjusted. It can be used with SubIdx operands now. + if (RC) + return VReg; + + // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual + // register instead. + RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx); + assert(RC && "No legal register class for VT supports that SubIdx"); + Register NewReg = MRI->createVirtualRegister(RC); + BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) + .addReg(VReg); + return NewReg; +} + +/// EmitSubregNode - Generate machine code for subreg nodes. +/// +void InstrEmitter::EmitSubregNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { + unsigned VRBase = 0; + unsigned Opc = Node->getMachineOpcode(); + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + for (SDNode *User : Node->uses()) { + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (Register::isVirtualRegister(DestReg)) { + VRBase = DestReg; + break; + } + } + } + + if (Opc == TargetOpcode::EXTRACT_SUBREG) { + // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no + // constraints on the %dst register, COPY can target all legal register + // classes. + unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *TRC = + TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); + + unsigned Reg; + MachineInstr *DefMI; + RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0)); + if (R && Register::isPhysicalRegister(R->getReg())) { + Reg = R->getReg(); + DefMI = nullptr; + } else { + Reg = R ? R->getReg() : getVR(Node->getOperand(0), VRBaseMap); + DefMI = MRI->getVRegDef(Reg); + } + + unsigned SrcReg, DstReg, DefSubIdx; + if (DefMI && + TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && + SubIdx == DefSubIdx && + TRC == MRI->getRegClass(SrcReg)) { + // Optimize these: + // r1025 = s/zext r1024, 4 + // r1026 = extract_subreg r1025, 4 + // to a copy + // r1026 = copy r1024 + VRBase = MRI->createVirtualRegister(TRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); + MRI->clearKillFlags(SrcReg); + } else { + // Reg may not support a SubIdx sub-register, and we may need to + // constrain its register class or issue a COPY to a compatible register + // class. + if (Register::isVirtualRegister(Reg)) + Reg = ConstrainForSubReg(Reg, SubIdx, + Node->getOperand(0).getSimpleValueType(), + Node->isDivergent(), Node->getDebugLoc()); + // Create the destreg if it is missing. + if (VRBase == 0) + VRBase = MRI->createVirtualRegister(TRC); + + // Create the extract_subreg machine instruction. + MachineInstrBuilder CopyMI = + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); + if (Register::isVirtualRegister(Reg)) + CopyMI.addReg(Reg, 0, SubIdx); + else + CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); + } + } else if (Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG) { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); + + // Figure out the register class to create for the destreg. It should be + // the largest legal register class supporting SubIdx sub-registers. + // RegisterCoalescer will constrain it further if it decides to eliminate + // the INSERT_SUBREG instruction. + // + // %dst = INSERT_SUBREG %src, %sub, SubIdx + // + // is lowered by TwoAddressInstructionPass to: + // + // %dst = COPY %src + // %dst:SubIdx = COPY %sub + // + // There is no constraint on the %src register class. + // + const TargetRegisterClass *SRC = + TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); + SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); + assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); + + if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase))) + VRBase = MRI->createVirtualRegister(SRC); + + // Create the insert_subreg or subreg_to_reg machine instruction. + MachineInstrBuilder MIB = + BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase); + + // If creating a subreg_to_reg, then the first input operand + // is an implicit value immediate, otherwise it's a register + if (Opc == TargetOpcode::SUBREG_TO_REG) { + const ConstantSDNode *SD = cast<ConstantSDNode>(N0); + MIB.addImm(SD->getZExtValue()); + } else + AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + // Add the subregister being inserted + AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + MIB.addImm(SubIdx); + MBB->insert(InsertPos, MIB); + } else + llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. +/// COPY_TO_REGCLASS is just a normal copy, except that the destination +/// register is constrained to be in a particular register class. +/// +void +InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + + // Create the new VReg in the destination class and emit a copy. + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *DstRC = + TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); + Register NewVReg = MRI->createVirtualRegister(DstRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + NewVReg).addReg(VReg); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. +/// +void InstrEmitter::EmitRegSequence(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); + Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); + const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg); + unsigned NumOps = Node->getNumOperands(); + // If the input pattern has a chain, then the root of the corresponding + // output pattern will get a chain as well. This can happen to be a + // REG_SEQUENCE (which is not "guarded" by countOperands/CountResults). + if (NumOps && Node->getOperand(NumOps-1).getValueType() == MVT::Other) + --NumOps; // Ignore chain if it exists. + + assert((NumOps & 1) == 1 && + "REG_SEQUENCE must have an odd number of operands!"); + for (unsigned i = 1; i != NumOps; ++i) { + SDValue Op = Node->getOperand(i); + if ((i & 1) == 0) { + RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1)); + // Skip physical registers as they don't have a vreg to get and we'll + // insert copies for them in TwoAddressInstructionPass anyway. + if (!R || !Register::isPhysicalRegister(R->getReg())) { + unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); + unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); + if (SRC && SRC != RC) { + MRI->setRegClass(NewVReg, SRC); + RC = SRC; + } + } + } + AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + } + + MBB->insert(InsertPos, MIB); + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitDbgValue - Generate machine instruction for a dbg_value node. +/// +MachineInstr * +InstrEmitter::EmitDbgValue(SDDbgValue *SD, + DenseMap<SDValue, unsigned> &VRBaseMap) { + MDNode *Var = SD->getVariable(); + const DIExpression *Expr = SD->getExpression(); + DebugLoc DL = SD->getDebugLoc(); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + + SD->setIsEmitted(); + + if (SD->isInvalidated()) { + // An invalidated SDNode must generate an undef DBG_VALUE: although the + // original value is no longer computed, earlier DBG_VALUEs live ranges + // must not leak into later code. + auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)); + MIB.addReg(0U); + MIB.addReg(0U, RegState::Debug); + MIB.addMetadata(Var); + MIB.addMetadata(Expr); + return &*MIB; + } + + if (SD->getKind() == SDDbgValue::FRAMEIX) { + // Stack address; this needs to be lowered in target-dependent fashion. + // EmitTargetCodeForFrameDebugValue is responsible for allocation. + auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SD->getFrameIx()); + + if (SD->isIndirect()) + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + + FrameMI.addReg(0); + return FrameMI.addMetadata(Var).addMetadata(Expr); + } + // Otherwise, we're going to create an instruction here. + const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + MachineInstrBuilder MIB = BuildMI(*MF, DL, II); + if (SD->getKind() == SDDbgValue::SDNODE) { + SDNode *Node = SD->getSDNode(); + SDValue Op = SDValue(Node, SD->getResNo()); + // It's possible we replaced this SDNode with other(s) and therefore + // didn't generate code for it. It's better to catch these cases where + // they happen and transfer the debug info, but trying to guarantee that + // in all cases would be very fragile; this is a safeguard for any + // that were missed. + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + if (I==VRBaseMap.end()) + MIB.addReg(0U); // undef + else + AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, + /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); + } else if (SD->getKind() == SDDbgValue::VREG) { + MIB.addReg(SD->getVReg(), RegState::Debug); + } else if (SD->getKind() == SDDbgValue::CONST) { + const Value *V = SD->getConst(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getBitWidth() > 64) + MIB.addCImm(CI); + else + MIB.addImm(CI->getSExtValue()); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + MIB.addFPImm(CF); + } else if (isa<ConstantPointerNull>(V)) { + // Note: This assumes that all nullptr constants are zero-valued. + MIB.addImm(0); + } else { + // Could be an Undef. In any case insert an Undef so we can see what we + // dropped. + MIB.addReg(0U); + } + } else { + // Insert an Undef so we can see what we dropped. + MIB.addReg(0U); + } + + // Indirect addressing is indicated by an Imm as the second parameter. + if (SD->isIndirect()) + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + + MIB.addReg(0U, RegState::Debug); + + MIB.addMetadata(Var); + MIB.addMetadata(Expr); + + return &*MIB; +} + +MachineInstr * +InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { + MDNode *Label = SD->getLabel(); + DebugLoc DL = SD->getDebugLoc(); + assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + + const MCInstrDesc &II = TII->get(TargetOpcode::DBG_LABEL); + MachineInstrBuilder MIB = BuildMI(*MF, DL, II); + MIB.addMetadata(Label); + + return &*MIB; +} + +/// EmitMachineNode - Generate machine code for a target-specific node and +/// needed dependencies. +/// +void InstrEmitter:: +EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned Opc = Node->getMachineOpcode(); + + // Handle subreg insert/extract specially + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG) { + EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); + return; + } + + // Handle COPY_TO_REGCLASS specially. + if (Opc == TargetOpcode::COPY_TO_REGCLASS) { + EmitCopyToRegClassNode(Node, VRBaseMap); + return; + } + + // Handle REG_SEQUENCE specially. + if (Opc == TargetOpcode::REG_SEQUENCE) { + EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); + return; + } + + if (Opc == TargetOpcode::IMPLICIT_DEF) + // We want a unique VR for each IMPLICIT_DEF use. + return; + + const MCInstrDesc &II = TII->get(Opc); + unsigned NumResults = CountResults(Node); + unsigned NumDefs = II.getNumDefs(); + const MCPhysReg *ScratchRegs = nullptr; + + // Handle STACKMAP and PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { + // Stackmaps do not have arguments and do not preserve their calling + // convention. However, to simplify runtime support, they clobber the same + // scratch registers as AnyRegCC. + unsigned CC = CallingConv::AnyReg; + if (Opc == TargetOpcode::PATCHPOINT) { + CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); + NumDefs = NumResults; + } + ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); + } + + unsigned NumImpUses = 0; + unsigned NodeOperands = + countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr; +#ifndef NDEBUG + unsigned NumMIOperands = NodeOperands + NumResults; + if (II.isVariadic()) + assert(NumMIOperands >= II.getNumOperands() && + "Too few operands for a variadic node!"); + else + assert(NumMIOperands >= II.getNumOperands() && + NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + + NumImpUses && + "#operands for dag node doesn't match .td file!"); +#endif + + // Create the new machine instruction. + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II); + + // Add result register values for things that are defined by this + // instruction. + if (NumResults) { + CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap); + + // Transfer any IR flags from the SDNode to the MachineInstr + MachineInstr *MI = MIB.getInstr(); + const SDNodeFlags Flags = Node->getFlags(); + if (Flags.hasNoSignedZeros()) + MI->setFlag(MachineInstr::MIFlag::FmNsz); + + if (Flags.hasAllowReciprocal()) + MI->setFlag(MachineInstr::MIFlag::FmArcp); + + if (Flags.hasNoNaNs()) + MI->setFlag(MachineInstr::MIFlag::FmNoNans); + + if (Flags.hasNoInfs()) + MI->setFlag(MachineInstr::MIFlag::FmNoInfs); + + if (Flags.hasAllowContract()) + MI->setFlag(MachineInstr::MIFlag::FmContract); + + if (Flags.hasApproximateFuncs()) + MI->setFlag(MachineInstr::MIFlag::FmAfn); + + if (Flags.hasAllowReassociation()) + MI->setFlag(MachineInstr::MIFlag::FmReassoc); + + if (Flags.hasNoUnsignedWrap()) + MI->setFlag(MachineInstr::MIFlag::NoUWrap); + + if (Flags.hasNoSignedWrap()) + MI->setFlag(MachineInstr::MIFlag::NoSWrap); + + if (Flags.hasExact()) + MI->setFlag(MachineInstr::MIFlag::IsExact); + + if (Flags.hasFPExcept()) + MI->setFlag(MachineInstr::MIFlag::FPExcept); + } + + // Emit all of the actual operands of this instruction, adding them to the + // instruction as appropriate. + bool HasOptPRefs = NumDefs > NumResults; + assert((!HasOptPRefs || !HasPhysRegOuts) && + "Unable to cope with optional defs and phys regs defs!"); + unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; + for (unsigned i = NumSkip; i != NodeOperands; ++i) + AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, + VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + + // Add scratch registers as implicit def and early clobber + if (ScratchRegs) + for (unsigned i = 0; ScratchRegs[i]; ++i) + MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine | + RegState::EarlyClobber); + + // Set the memory reference descriptions of this instruction now that it is + // part of the function. + MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands()); + + // Insert the instruction into position in the block. This needs to + // happen before any custom inserter hook is called so that the + // hook knows where in the block to insert the replacement code. + MBB->insert(InsertPos, MIB); + + // The MachineInstr may also define physregs instead of virtregs. These + // physreg values can reach other instructions in different ways: + // + // 1. When there is a use of a Node value beyond the explicitly defined + // virtual registers, we emit a CopyFromReg for one of the implicitly + // defined physregs. This only happens when HasPhysRegOuts is true. + // + // 2. A CopyFromReg reading a physreg may be glued to this instruction. + // + // 3. A glued instruction may implicitly use a physreg. + // + // 4. A glued instruction may use a RegisterSDNode operand. + // + // Collect all the used physreg defs, and make sure that any unused physreg + // defs are marked as dead. + SmallVector<Register, 8> UsedRegs; + + // Additional results must be physical register defs. + if (HasPhysRegOuts) { + for (unsigned i = NumDefs; i < NumResults; ++i) { + Register Reg = II.getImplicitDefs()[i - NumDefs]; + if (!Node->hasAnyUseOfValue(i)) + continue; + // This implicitly defined physreg has a use. + UsedRegs.push_back(Reg); + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + } + } + + // Scan the glue chain for any used physregs. + if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { + for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { + if (F->getOpcode() == ISD::CopyFromReg) { + UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); + continue; + } else if (F->getOpcode() == ISD::CopyToReg) { + // Skip CopyToReg nodes that are internal to the glue chain. + continue; + } + // Collect declared implicit uses. + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { + Register Reg = R->getReg(); + if (Reg.isPhysical()) + UsedRegs.push_back(Reg); + } + } + } + + // Finally mark unused registers as dead. + if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef()) + MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); + + // Run post-isel target hook to adjust this instruction if needed. + if (II.hasPostISelHook()) + TLI->AdjustInstrPostInstrSelection(*MIB, Node); +} + +/// EmitSpecialNode - Generate machine code for a target-independent node and +/// needed dependencies. +void InstrEmitter:: +EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(); +#endif + llvm_unreachable("This target-independent node should have been selected!"); + case ISD::EntryToken: + llvm_unreachable("EntryToken should have been excluded from the schedule!"); + case ISD::MERGE_VALUES: + case ISD::TokenFactor: // fall thru + break; + case ISD::CopyToReg: { + unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + SDValue SrcVal = Node->getOperand(2); + if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() && + SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + // Instead building a COPY to that vreg destination, build an + // IMPLICIT_DEF instruction instead. + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); + break; + } + unsigned SrcReg; + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) + SrcReg = R->getReg(); + else + SrcReg = getVR(SrcVal, VRBaseMap); + + if (SrcReg == DestReg) // Coalesced away the copy? Ignore. + break; + + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + DestReg).addReg(SrcReg); + break; + } + case ISD::CopyFromReg: { + unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); + break; + } + case ISD::EH_LABEL: + case ISD::ANNOTATION_LABEL: { + unsigned Opc = (Node->getOpcode() == ISD::EH_LABEL) + ? TargetOpcode::EH_LABEL + : TargetOpcode::ANNOTATION_LABEL; + MCSymbol *S = cast<LabelSDNode>(Node)->getLabel(); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(Opc)).addSym(S); + break; + } + + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: { + unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? + TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; + + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1)); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) + .addFrameIndex(FI->getIndex()); + break; + } + + case ISD::INLINEASM: + case ISD::INLINEASM_BR: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. + + // Create the inline asm machine instruction. + unsigned TgtOpc = Node->getOpcode() == ISD::INLINEASM_BR + ? TargetOpcode::INLINEASM_BR + : TargetOpcode::INLINEASM; + MachineInstrBuilder MIB = + BuildMI(*MF, Node->getDebugLoc(), TII->get(TgtOpc)); + + // Add the asm string as an external symbol operand. + SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); + const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); + MIB.addExternalSymbol(AsmStr); + + // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore + // bits. + int64_t ExtraInfo = + cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> + getZExtValue(); + MIB.addImm(ExtraInfo); + + // Remember to operand index of the group flags. + SmallVector<unsigned, 8> GroupIdx; + + // Remember registers that are part of early-clobber defs. + SmallVector<unsigned, 8> ECRegs; + + // Add all of the operand registers to the instruction. + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + GroupIdx.push_back(MIB->getNumOperands()); + MIB.addImm(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegDef: + for (unsigned j = 0; j != NumVals; ++j, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + // FIXME: Add dead flags for physical and virtual registers defined. + // For now, mark physical register defs as implicit to help fast + // regalloc. This makes inline asm look a lot like calls. + MIB.addReg(Reg, + RegState::Define | + getImplRegState(Register::isPhysicalRegister(Reg))); + } + break; + case InlineAsm::Kind_RegDefEarlyClobber: + case InlineAsm::Kind_Clobber: + for (unsigned j = 0; j != NumVals; ++j, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MIB.addReg(Reg, + RegState::Define | RegState::EarlyClobber | + getImplRegState(Register::isPhysicalRegister(Reg))); + ECRegs.push_back(Reg); + } + break; + case InlineAsm::Kind_RegUse: // Use of register. + case InlineAsm::Kind_Imm: // Immediate. + case InlineAsm::Kind_Mem: // Addressing mode. + // The addressing mode has been selected, just add all of the + // operands to the machine instruction. + for (unsigned j = 0; j != NumVals; ++j, ++i) + AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); + + // Manually set isTied bits. + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { + unsigned DefGroup = 0; + if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { + unsigned DefIdx = GroupIdx[DefGroup] + 1; + unsigned UseIdx = GroupIdx.back() + 1; + for (unsigned j = 0; j != NumVals; ++j) + MIB->tieOperands(DefIdx + j, UseIdx + j); + } + } + break; + } + } + + // GCC inline assembly allows input operands to also be early-clobber + // output operands (so long as the operand is written only after it's + // used), but this does not match the semantics of our early-clobber flag. + // If an early-clobber operand register is also an input operand register, + // then remove the early-clobber flag. + for (unsigned Reg : ECRegs) { + if (MIB->readsRegister(Reg, TRI)) { + MachineOperand *MO = + MIB->findRegisterDefOperand(Reg, false, false, TRI); + assert(MO && "No def operand for clobbered register?"); + MO->setIsEarlyClobber(false); + } + } + + // Get the mdnode from the asm if it exists and add it to the instruction. + SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); + const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); + if (MD) + MIB.addMetadata(MD); + + MBB->insert(InsertPos, MIB); + break; + } + } +} + +/// InstrEmitter - Construct an InstrEmitter and set it to start inserting +/// at the given position in the given block. +InstrEmitter::InstrEmitter(MachineBasicBlock *mbb, + MachineBasicBlock::iterator insertpos) + : MF(mbb->getParent()), MRI(&MF->getRegInfo()), + TII(MF->getSubtarget().getInstrInfo()), + TRI(MF->getSubtarget().getRegisterInfo()), + TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), + InsertPos(insertpos) {} diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h new file mode 100644 index 0000000000000..cfe99dd977b5b --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -0,0 +1,142 @@ +//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This declares the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/SelectionDAG.h" + +namespace llvm { + +class MachineInstrBuilder; +class MCInstrDesc; +class SDDbgValue; + +class LLVM_LIBRARY_VISIBILITY InstrEmitter { + MachineFunction *MF; + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + + MachineBasicBlock *MBB; + MachineBasicBlock::iterator InsertPos; + + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an + /// implicit physical register output. + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, + bool IsClone, bool IsCloned, + unsigned SrcReg, + DenseMap<SDValue, unsigned> &VRBaseMap); + + void CreateVirtualRegisters(SDNode *Node, + MachineInstrBuilder &MIB, + const MCInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getVR - Return the virtual register corresponding to the specified result + /// of the specified node. + unsigned getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// AddRegisterOperand - Add the specified register as an operand to the + /// specified machine instr. Insert register copies if the register is + /// not in the required register class. + void AddRegisterOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned); + + /// AddOperand - Add the specified operand to the specified machine instr. II + /// specifies the instruction information for the node, and IIOpNum is the + /// operand number (in the II) that we are adding. IIOpNum and II are used for + /// assertions only. + void AddOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned); + + /// ConstrainForSubReg - Try to constrain VReg to a register class that + /// supports SubIdx sub-registers. Emit a copy if that isn't possible. + /// Return the virtual register to use. + unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT, + bool isDivergent, const DebugLoc &DL); + + /// EmitSubregNode - Generate machine code for subreg nodes. + /// + void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); + + /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. + /// COPY_TO_REGCLASS is just a normal copy, except that the destination + /// register is constrained to be in a particular register class. + /// + void EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. + /// + void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); +public: + /// CountResults - The results of target nodes have register or immediate + /// operands first, then an optional chain, and optional flag operands + /// (which do not go into the machine instrs.) + static unsigned CountResults(SDNode *Node); + + /// EmitDbgValue - Generate machine instruction for a dbg_value node. + /// + MachineInstr *EmitDbgValue(SDDbgValue *SD, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// Generate machine instruction for a dbg_label node. + MachineInstr *EmitDbgLabel(SDDbgLabel *SD); + + /// EmitNode - Generate machine code for a node and needed dependencies. + /// + void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Node->isMachineOpcode()) + EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); + else + EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap); + } + + /// getBlock - Return the current basic block. + MachineBasicBlock *getBlock() { return MBB; } + + /// getInsertPos - Return the current insertion position. + MachineBasicBlock::iterator getInsertPos() { return InsertPos; } + + /// InstrEmitter - Construct an InstrEmitter and set it to start inserting + /// at the given position in the given block. + InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos); + +private: + void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); + void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); +}; + +} + +#endif diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp new file mode 100644 index 0000000000000..f9fdf525240fa --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -0,0 +1,4646 @@ +//===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::Legalize method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <tuple> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "legalizedag" + +namespace { + +/// Keeps track of state when getting the sign of a floating-point value as an +/// integer. +struct FloatSignAsInt { + EVT FloatVT; + SDValue Chain; + SDValue FloatPtr; + SDValue IntPtr; + MachinePointerInfo IntPointerInfo; + MachinePointerInfo FloatPointerInfo; + SDValue IntValue; + APInt SignMask; + uint8_t SignBit; +}; + +//===----------------------------------------------------------------------===// +/// This takes an arbitrary SelectionDAG as input and +/// hacks on it until the target machine can handle it. This involves +/// eliminating value sizes the machine cannot handle (promoting small sizes to +/// large sizes or splitting up large values into small values) as well as +/// eliminating operations the machine cannot handle. +/// +/// This code also does a small amount of optimization and recognition of idioms +/// as part of its processing. For example, if a target does not support a +/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this +/// will attempt merge setcc and brc instructions into brcc's. +class SelectionDAGLegalize { + const TargetMachine &TM; + const TargetLowering &TLI; + SelectionDAG &DAG; + + /// The set of nodes which have already been legalized. We hold a + /// reference to it in order to update as necessary on node deletion. + SmallPtrSetImpl<SDNode *> &LegalizedNodes; + + /// A set of all the nodes updated during legalization. + SmallSetVector<SDNode *, 16> *UpdatedNodes; + + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + } + + // Libcall insertion helpers. + +public: + SelectionDAGLegalize(SelectionDAG &DAG, + SmallPtrSetImpl<SDNode *> &LegalizedNodes, + SmallSetVector<SDNode *, 16> *UpdatedNodes = nullptr) + : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG), + LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {} + + /// Legalizes the given operation. + void LegalizeOp(SDNode *Node); + +private: + SDValue OptimizeFloatStore(StoreSDNode *ST); + + void LegalizeLoadOps(SDNode *Node); + void LegalizeStoreOps(SDNode *Node); + + /// Some targets cannot handle a variable + /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it + /// is necessary to spill the vector being inserted into to memory, perform + /// the insert there, and then read the result back. + SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, + const SDLoc &dl); + SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, + const SDLoc &dl); + + /// Return a vector shuffle operation which + /// performs the same shuffe in terms of order or result bytes, but on a type + /// whose vector element type is narrower than the original shuffle type. + /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> + SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, const SDLoc &dl, + SDValue N1, SDValue N2, + ArrayRef<int> Mask) const; + + bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + bool &NeedInvert, const SDLoc &dl); + + SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); + SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128); + SDValue ExpandArgFPLibCall(SDNode *Node, + RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128); + void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); + + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, + const SDLoc &dl); + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, + const SDLoc &dl, SDValue ChainIn); + SDValue ExpandBUILD_VECTOR(SDNode *Node); + SDValue ExpandSPLAT_VECTOR(SDNode *Node); + SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); + void ExpandDYNAMIC_STACKALLOC(SDNode *Node, + SmallVectorImpl<SDValue> &Results); + void getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL, + SDValue Value) const; + SDValue modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL, + SDValue NewIntValue) const; + SDValue ExpandFCOPYSIGN(SDNode *Node) const; + SDValue ExpandFABS(SDNode *Node) const; + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, + const SDLoc &dl); + SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, + const SDLoc &dl); + SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, + const SDLoc &dl); + + SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); + SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); + + SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + SDValue ExpandInsertToVectorThroughStack(SDValue Op); + SDValue ExpandVectorBuildThroughStack(SDNode* Node); + + SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + SDValue ExpandConstant(ConstantSDNode *CP); + + // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall + bool ExpandNode(SDNode *Node); + void ConvertNodeToLibcall(SDNode *Node); + void PromoteNode(SDNode *Node); + +public: + // Node replacement helpers + + void ReplacedNode(SDNode *N) { + LegalizedNodes.erase(N); + if (UpdatedNodes) + UpdatedNodes->insert(N); + } + + void ReplaceNode(SDNode *Old, SDNode *New) { + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + assert(Old->getNumValues() == New->getNumValues() && + "Replacing one node with another that produces a different number " + "of values!"); + DAG.ReplaceAllUsesWith(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New); + ReplacedNode(Old); + } + + void ReplaceNode(SDValue Old, SDValue New) { + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + DAG.ReplaceAllUsesWith(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); + ReplacedNode(Old.getNode()); + } + + void ReplaceNode(SDNode *Old, const SDValue *New) { + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); + + DAG.ReplaceAllUsesWith(Old, New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { + LLVM_DEBUG(dbgs() << (i == 0 ? " with: " : " and: "); + New[i]->dump(&DAG)); + if (UpdatedNodes) + UpdatedNodes->insert(New[i].getNode()); + } + ReplacedNode(Old); + } + + void ReplaceNodeWithValue(SDValue Old, SDValue New) { + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + DAG.ReplaceAllUsesOfValueWith(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); + ReplacedNode(Old.getNode()); + } +}; + +} // end anonymous namespace + +/// Return a vector shuffle operation which +/// performs the same shuffle in terms of order or result bytes, but on a type +/// whose vector element type is narrower than the original shuffle type. +/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> +SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType( + EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, + ArrayRef<int> Mask) const { + unsigned NumMaskElts = VT.getVectorNumElements(); + unsigned NumDestElts = NVT.getVectorNumElements(); + unsigned NumEltsGrowth = NumDestElts / NumMaskElts; + + assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); + + if (NumEltsGrowth == 1) + return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask); + + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumMaskElts; ++i) { + int Idx = Mask[i]; + for (unsigned j = 0; j != NumEltsGrowth; ++j) { + if (Idx < 0) + NewMask.push_back(-1); + else + NewMask.push_back(Idx * NumEltsGrowth + j); + } + } + assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); + assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); + return DAG.getVectorShuffle(NVT, dl, N1, N2, NewMask); +} + +/// Expands the ConstantFP node to an integer constant or +/// a load from the constant pool. +SDValue +SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { + bool Extend = false; + SDLoc dl(CFP); + + // If a FP immediate is precise when represented as a float and if the + // target can do an extending load from float to double, we put it into + // the constant pool as a float, even if it's is statically typed as a + // double. This shrinks FP constants and canonicalizes them for targets where + // an FP extending load is the same cost as a normal load (such as on the x87 + // fp stack or PPC FP unit). + EVT VT = CFP->getValueType(0); + ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue()); + if (!UseCP) { + assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); + return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), dl, + (VT == MVT::f64) ? MVT::i64 : MVT::i32); + } + + APFloat APF = CFP->getValueAPF(); + EVT OrigVT = VT; + EVT SVT = VT; + + // We don't want to shrink SNaNs. Converting the SNaN back to its real type + // can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ). + if (!APF.isSignaling()) { + while (SVT != MVT::f32 && SVT != MVT::f16) { + SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); + if (ConstantFPSDNode::isValueValidForType(SVT, APF) && + // Only do this if the target has a native EXTLOAD instruction from + // smaller type. + TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) && + TLI.ShouldShrinkFPConstant(OrigVT)) { + Type *SType = SVT.getTypeForEVT(*DAG.getContext()); + LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); + VT = SVT; + Extend = true; + } + } + } + + SDValue CPIdx = + DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + if (Extend) { + SDValue Result = DAG.getExtLoad( + ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT, + Alignment); + return Result; + } + SDValue Result = DAG.getLoad( + OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); + return Result; +} + +/// Expands the Constant node to a load from the constant pool. +SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { + SDLoc dl(CP); + EVT VT = CP->getValueType(0); + SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), + TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + SDValue Result = DAG.getLoad( + VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); + return Result; +} + +/// Some target cannot handle a variable insertion index for the +/// INSERT_VECTOR_ELT instruction. In this case, it +/// is necessary to spill the vector being inserted into to memory, perform +/// the insert there, and then read the result back. +SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, + SDValue Val, + SDValue Idx, + const SDLoc &dl) { + SDValue Tmp1 = Vec; + SDValue Tmp2 = Val; + SDValue Tmp3 = Idx; + + // If the target doesn't support this, we have to spill the input vector + // to a temporary stack slot, update the element, then reload it. This is + // badness. We could also load the value into a vector register (either + // with a "move to register" or "extload into register" instruction, then + // permute it into place, if the idx is a constant and if the idx is + // supported by the target. + EVT VT = Tmp1.getValueType(); + EVT EltVT = VT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VT); + + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + + // Store the vector. + SDValue Ch = DAG.getStore( + DAG.getEntryNode(), dl, Tmp1, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); + + SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3); + + // Store the scalar value. + Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT); + // Load the updated vector. + return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), SPFI)); +} + +SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, + SDValue Idx, + const SDLoc &dl) { + if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { + // SCALAR_TO_VECTOR requires that the type of the value being inserted + // match the element type of the vector being created, except for + // integers in which case the inserted value can be over width. + EVT EltVT = Vec.getValueType().getVectorElementType(); + if (Val.getValueType() == EltVT || + (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) { + SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + Vec.getValueType(), Val); + + unsigned NumElts = Vec.getValueType().getVectorNumElements(); + // We generate a shuffle of InVec and ScVec, so the shuffle mask + // should be 0,1,2,3,4,5... with the appropriate element replaced with + // elt 0 of the RHS. + SmallVector<int, 8> ShufOps; + for (unsigned i = 0; i != NumElts; ++i) + ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); + + return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps); + } + } + return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); +} + +SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { + LLVM_DEBUG(dbgs() << "Optimizing float store operations\n"); + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // FIXME: We shouldn't do this for TargetConstantFP's. + // FIXME: move this to the DAG Combiner! Note that we can't regress due + // to phase ordering between legalized code and the dag combiner. This + // probably means that we need to integrate dag combiner and legalizer + // together. + // We generally can't do this one for long doubles. + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + unsigned Alignment = ST->getAlignment(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); + AAMDNodes AAInfo = ST->getAAInfo(); + SDLoc dl(ST); + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { + if (CFP->getValueType(0) == MVT::f32 && + TLI.isTypeLegal(MVT::i32)) { + SDValue Con = DAG.getConstant(CFP->getValueAPF(). + bitcastToAPInt().zextOrTrunc(32), + SDLoc(CFP), MVT::i32); + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment, + MMOFlags, AAInfo); + } + + if (CFP->getValueType(0) == MVT::f64) { + // If this target supports 64-bit registers, do a single 64-bit store. + if (TLI.isTypeLegal(MVT::i64)) { + SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + zextOrTrunc(64), SDLoc(CFP), MVT::i64); + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); + } + + if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { + // Otherwise, if the target supports 32-bit registers, use 2 32-bit + // stores. If the target supports neither 32- nor 64-bits, this + // xform is certainly not worth it. + const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32); + SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment, + MMOFlags, AAInfo); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(4, dl, Ptr.getValueType())); + Hi = DAG.getStore(Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(4), + MinAlign(Alignment, 4U), MMOFlags, AAInfo); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } + } + } + return SDValue(nullptr, 0); +} + +void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDLoc dl(Node); + + unsigned Alignment = ST->getAlignment(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); + AAMDNodes AAInfo = ST->getAAInfo(); + + if (!ST->isTruncatingStore()) { + LLVM_DEBUG(dbgs() << "Legalizing store operation\n"); + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + ReplaceNode(ST, OptStore); + return; + } + + SDValue Value = ST->getValue(); + MVT VT = Value.getSimpleValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + // If this is an unaligned store and the target doesn't support it, + // expand it. + EVT MemVT = ST->getMemoryVT(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { + LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); + } else + LLVM_DEBUG(dbgs() << "Legal store\n"); + break; + } + case TargetLowering::Custom: { + LLVM_DEBUG(dbgs() << "Trying custom lowering\n"); + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res && Res != SDValue(Node, 0)) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Promote: { + MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote stores to same size type"); + Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + return; + } + + LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n"); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + auto &DL = DAG.getDataLayout(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); + Value = DAG.getZeroExtendInReg(Value, dl, StVT); + SDValue Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, + Alignment, MMOFlags, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(!StVT.isVector() && "Unsupported truncstore!"); + unsigned LogStWidth = Log2_32(StWidth); + assert(LogStWidth < 32); + unsigned RoundWidth = 1 << LogStWidth; + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (DL.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + RoundVT, Alignment, MMOFlags, AAInfo); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(RoundWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); + Hi = DAG.getTruncStore( + Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + } else { + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(ExtraWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), + RoundVT, Alignment, MMOFlags, AAInfo); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Lo = DAG.getTruncStore( + Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + } + + // The order of the stores doesn't matter. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); + } else { + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + EVT MemVT = ST->getMemoryVT(); + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); + } + break; + } + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res && Res != SDValue(Node, 0)) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Expand: + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); + + SDValue Result; + + // TRUNCSTORE:i16 i32 -> STORE i16 + if (TLI.isTypeLegal(StVT)) { + Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); + Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); + } else { + // The in-memory type isn't legal. Truncate to the type it would promote + // to, and then do a truncstore. + Value = DAG.getNode(ISD::TRUNCATE, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), StVT), + Value); + Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + StVT, Alignment, MMOFlags, AAInfo); + } + + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } +} + +void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { + LoadSDNode *LD = cast<LoadSDNode>(Node); + SDValue Chain = LD->getChain(); // The chain. + SDValue Ptr = LD->getBasePtr(); // The base pointer. + SDValue Value; // The value returned by the load op. + SDLoc dl(Node); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) { + LLVM_DEBUG(dbgs() << "Legalizing non-extending load operation\n"); + MVT VT = Node->getSimpleValueType(0); + SDValue RVal = SDValue(Node, 0); + SDValue RChain = SDValue(Node, 1); + + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + EVT MemVT = LD->getMemoryVT(); + const DataLayout &DL = DAG.getDataLayout(); + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *LD->getMemOperand())) { + std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); + } + break; + } + case TargetLowering::Custom: + if (SDValue Res = TLI.LowerOperation(RVal, DAG)) { + RVal = Res; + RChain = Res.getValue(1); + } + break; + + case TargetLowering::Promote: { + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote loads to same size type"); + + SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand()); + RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); + RChain = Res.getValue(1); + break; + } + } + if (RChain.getNode() != Node) { + assert(RVal.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); + if (UpdatedNodes) { + UpdatedNodes->insert(RVal.getNode()); + UpdatedNodes->insert(RChain.getNode()); + } + ReplacedNode(Node); + } + return; + } + + LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n"); + EVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + unsigned Alignment = LD->getAlignment(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) == + TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Value = Result; + Chain = Ch; + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(!SrcVT.isVector() && "Unsupported extload!"); + unsigned LogSrcWidth = Log2_32(SrcWidth); + assert(LogSrcWidth < 32); + unsigned RoundWidth = 1 << LogSrcWidth; + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + auto &DL = DAG.getDataLayout(); + + if (DL.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, + AAInfo); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, + AAInfo); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode( + ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, dl, + TLI.getShiftAmountTy(Hi.getValueType(), DL))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, + AAInfo); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, + AAInfo); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode( + ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, dl, + TLI.getShiftAmountTy(Hi.getValueType(), DL))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } + + Chain = Ch; + } else { + bool isCustom = false; + switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0), + SrcVT.getSimpleVT())) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + LLVM_FALLTHROUGH; + case TargetLowering::Legal: + Value = SDValue(Node, 0); + Chain = SDValue(Node, 1); + + if (isCustom) { + if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { + Value = Res; + Chain = Res.getValue(1); + } + } else { + // If this is an unaligned load and the target doesn't support it, + // expand it. + EVT MemVT = LD->getMemoryVT(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, + *LD->getMemOperand())) { + std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG); + } + } + break; + + case TargetLowering::Expand: { + EVT DestVT = Node->getValueType(0); + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) { + // If the source type is not legal, see if there is a legal extload to + // an intermediate type that we can then extend further. + EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); + if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT? + TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) { + // If we are loading a legal type, this is a non-extload followed by a + // full extend. + ISD::LoadExtType MidExtType = + (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType; + + SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr, + SrcVT, LD->getMemOperand()); + unsigned ExtendOp = + ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType); + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); + break; + } + + // Handle the special case of fp16 extloads. EXTLOAD doesn't have the + // normal undefined upper bits behavior to allow using an in-reg extend + // with the illegal FP type, so load as an integer and do the + // from-integer conversion. + if (SrcVT.getScalarType() == MVT::f16) { + EVT ISrcVT = SrcVT.changeTypeToInteger(); + EVT IDestVT = DestVT.changeTypeToInteger(); + EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); + + SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain, + Ptr, ISrcVT, LD->getMemOperand()); + Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); + Chain = Result.getValue(1); + break; + } + } + + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); + + // FIXME: This does not work for vectors on most targets. Sign- + // and zero-extend operations are currently folded into extending + // loads, whether they are legal or not, and then we end up here + // without any support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an + // explicit zero/sign extend inreg. + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0), + Chain, Ptr, SrcVT, + LD->getMemOperand()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); + Value = ValRes; + Chain = Result.getValue(1); + break; + } + } + } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + if (Chain.getNode() != Node) { + assert(Value.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Value.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } + ReplacedNode(Node); + } +} + +/// Return a legal replacement for the given operation, with all legal operands. +void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); + + // Allow illegal target nodes and illegal registers. + if (Node->getOpcode() == ISD::TargetConstant || + Node->getOpcode() == ISD::Register) + return; + +#ifndef NDEBUG + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal && + "Unexpected illegal type!"); + + for (const SDValue &Op : Node->op_values()) + assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == + TargetLowering::TypeLegal || + Op.getOpcode() == ISD::TargetConstant || + Op.getOpcode() == ISD::Register) && + "Unexpected illegal type!"); +#endif + + // Figure out the correct action; the way to query this varies by opcode + TargetLowering::LegalizeAction Action = TargetLowering::Legal; + bool SimpleFinishLegalizing = true; + switch (Node->getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::STACKSAVE: + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; + case ISD::GET_DYNAMIC_AREA_OFFSET: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + break; + case ISD::VAARG: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + if (Action != TargetLowering::Promote) + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; + case ISD::FP_TO_FP16: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::EXTRACT_VECTOR_ELT: + case ISD::LROUND: + case ISD::LLROUND: + case ISD::LRINT: + case ISD::LLRINT: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; + case ISD::SIGN_EXTEND_INREG: { + EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Action = TLI.getOperationAction(Node->getOpcode(), InnerType); + break; + } + case ISD::ATOMIC_STORE: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(2).getValueType()); + break; + case ISD::SELECT_CC: + case ISD::SETCC: + case ISD::BR_CC: { + unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : + Node->getOpcode() == ISD::SETCC ? 2 : 1; + unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; + MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get(); + Action = TLI.getCondCodeAction(CCCode, OpVT); + if (Action == TargetLowering::Legal) { + if (Node->getOpcode() == ISD::SELECT_CC) + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + else + Action = TLI.getOperationAction(Node->getOpcode(), OpVT); + } + break; + } + case ISD::LOAD: + case ISD::STORE: + // FIXME: Model these properly. LOAD and STORE are complicated, and + // STORE expects the unlegalized operand in some cases. + SimpleFinishLegalizing = false; + break; + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: + // FIXME: This shouldn't be necessary. These nodes have special properties + // dealing with the recursive nature of legalization. Removing this + // special case should be done as part of making LegalizeDAG non-recursive. + SimpleFinishLegalizing = false; + break; + case ISD::EXTRACT_ELEMENT: + case ISD::FLT_ROUNDS_: + case ISD::MERGE_VALUES: + case ISD::EH_RETURN: + case ISD::FRAME_TO_ARGS_OFFSET: + case ISD::EH_DWARF_CFA: + case ISD::EH_SJLJ_SETJMP: + case ISD::EH_SJLJ_LONGJMP: + case ISD::EH_SJLJ_SETUP_DISPATCH: + // These operations lie about being legal: when they claim to be legal, + // they should actually be expanded. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Expand; + break; + case ISD::INIT_TRAMPOLINE: + case ISD::ADJUST_TRAMPOLINE: + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::ADDROFRETURNADDR: + case ISD::SPONENTRY: + // These operations lie about being legal: when they claim to be legal, + // they should actually be custom-lowered. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Custom; + break; + case ISD::READCYCLECOUNTER: + // READCYCLECOUNTER returns an i64, even if type legalization might have + // expanded that to several smaller types. + Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64); + break; + case ISD::READ_REGISTER: + case ISD::WRITE_REGISTER: + // Named register is legal in the DAG, but blocked by register name + // selection if not implemented by target (to chose the correct register) + // They'll be converted to Copy(To/From)Reg. + Action = TargetLowering::Legal; + break; + case ISD::DEBUGTRAP: + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Expand) { + // replace ISD::DEBUGTRAP with ISD::TRAP + SDValue NewVal; + NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(), + Node->getOperand(0)); + ReplaceNode(Node, NewVal.getNode()); + LegalizeOp(NewVal.getNode()); + return; + } + break; + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These pseudo-ops are the same as the other STRICT_ ops except + // they are registered with setOperationAction() using the input type + // instead of the output type. + Action = TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()); + break; + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: { + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + break; + } + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: { + unsigned Scale = Node->getConstantOperandVal(2); + Action = TLI.getFixedPointOperationAction(Node->getOpcode(), + Node->getValueType(0), Scale); + break; + } + case ISD::MSCATTER: + Action = TLI.getOperationAction(Node->getOpcode(), + cast<MaskedScatterSDNode>(Node)->getValue().getValueType()); + break; + case ISD::MSTORE: + Action = TLI.getOperationAction(Node->getOpcode(), + cast<MaskedStoreSDNode>(Node)->getValue().getValueType()); + break; + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Action = TLI.getOperationAction( + Node->getOpcode(), Node->getOperand(0).getValueType()); + break; + default: + if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { + Action = TargetLowering::Legal; + } else { + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + } + break; + } + + if (SimpleFinishLegalizing) { + SDNode *NewNode = Node; + switch (Node->getOpcode()) { + default: break; + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::ROTL: + case ISD::ROTR: { + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + if (!Op1.getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op1); + // The getShiftAmountOperand() may create a new operand node or + // return the existing one. If new operand is created we need + // to update the parent node. + // Do not try to legalize SAO here! It will be automatically legalized + // in the next round. + if (SAO != Op1) + NewNode = DAG.UpdateNodeOperands(Node, Op0, SAO); + } + } + break; + case ISD::FSHL: + case ISD::FSHR: + case ISD::SRL_PARTS: + case ISD::SRA_PARTS: + case ISD::SHL_PARTS: { + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue Op2 = Node->getOperand(2); + if (!Op2.getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op2); + // The getShiftAmountOperand() may create a new operand node or + // return the existing one. If new operand is created we need + // to update the parent node. + if (SAO != Op2) + NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO); + } + break; + } + } + + if (NewNode != Node) { + ReplaceNode(Node, NewNode); + Node = NewNode; + } + switch (Action) { + case TargetLowering::Legal: + LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); + return; + case TargetLowering::Custom: + LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); + // FIXME: The handling for custom lowering with multiple results is + // a complete mess. + if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { + if (!(Res.getNode() != Node || Res.getResNo() != 0)) + return; + + if (Node->getNumValues() == 1) { + LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); + // We can just directly replace this node with the lowered value. + ReplaceNode(SDValue(Node, 0), Res); + return; + } + + SmallVector<SDValue, 8> ResultVals; + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + ResultVals.push_back(Res.getValue(i)); + LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); + ReplaceNode(Node, ResultVals.data()); + return; + } + LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); + LLVM_FALLTHROUGH; + case TargetLowering::Expand: + if (ExpandNode(Node)) + return; + LLVM_FALLTHROUGH; + case TargetLowering::LibCall: + ConvertNodeToLibcall(Node); + return; + case TargetLowering::Promote: + PromoteNode(Node); + return; + } + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "NODE: "; + Node->dump( &DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to legalize this operator!"); + + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: + break; + case ISD::LOAD: + return LegalizeLoadOps(Node); + case ISD::STORE: + return LegalizeStoreOps(Node); + } +} + +SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { + SDValue Vec = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + SDLoc dl(Op); + + // Before we generate a new store to a temporary stack slot, see if there is + // already one that we can use. There often is because when we scalarize + // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole + // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in + // the vector. If all are expanded here, we don't want one store per vector + // element. + + // Caches for hasPredecessorHelper + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + Visited.insert(Op.getNode()); + Worklist.push_back(Idx.getNode()); + SDValue StackPtr, Ch; + for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), + UE = Vec.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) { + if (ST->isIndexed() || ST->isTruncatingStore() || + ST->getValue() != Vec) + continue; + + // Make sure that nothing else could have stored into the destination of + // this store. + if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) + continue; + + // If the index is dependent on the store we will introduce a cycle when + // creating the load (the load uses the index, and by replacing the chain + // we will make the index dependent on the load). Also, the store might be + // dependent on the extractelement and introduce a cycle when creating + // the load. + if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) || + ST->hasPredecessor(Op.getNode())) + continue; + + StackPtr = ST->getBasePtr(); + Ch = SDValue(ST, 0); + break; + } + } + + EVT VecVT = Vec.getValueType(); + + if (!Ch.getNode()) { + // Store the value to a temporary stack slot, then LOAD the returned part. + StackPtr = DAG.CreateStackTemporary(VecVT); + Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo()); + } + + StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + + SDValue NewLoad; + + if (Op.getValueType().isVector()) + NewLoad = + DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo()); + else + NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + MachinePointerInfo(), + VecVT.getVectorElementType()); + + // Replace the chain going out of the store, by the one out of the load. + DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); + + // We introduced a cycle though, so update the loads operands, making sure + // to use the original store's chain as an incoming chain. + SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(), + NewLoad->op_end()); + NewLoadOperands[0] = Ch; + NewLoad = + SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0); + return NewLoad; +} + +SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { + assert(Op.getValueType().isVector() && "Non-vector insert subvector!"); + + SDValue Vec = Op.getOperand(0); + SDValue Part = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + SDLoc dl(Op); + + // Store the value to a temporary stack slot, then LOAD the returned part. + EVT VecVT = Vec.getValueType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); + + // First store the whole vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); + + // Then store the inserted part. + SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + + // Store the subvector. + Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo()); + + // Finally, load the updated vector. + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo); +} + +SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { + // We can't handle this case efficiently. Allocate a sufficiently + // aligned object on the stack, store each element into it, then load + // the result as a vector. + // Create the stack frame object. + EVT VT = Node->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(Node); + SDValue FIPtr = DAG.CreateStackTemporary(VT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); + + // Emit a store of each element to the stack slot. + SmallVector<SDValue, 8> Stores; + unsigned TypeByteSize = EltVT.getSizeInBits() / 8; + assert(TypeByteSize > 0 && "Vector element type too small for stack store!"); + // Store (in the right endianness) the elements to memory. + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (Node->getOperand(i).isUndef()) continue; + + unsigned Offset = TypeByteSize*i; + + SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); + + // If the destination vector element type is narrower than the source + // element type, only store the bits necessary. + if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { + Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, + Node->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), EltVT)); + } else + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), + Idx, PtrInfo.getWithOffset(Offset))); + } + + SDValue StoreChain; + if (!Stores.empty()) // Not all undef elements? + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + else + StoreChain = DAG.getEntryNode(); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo); +} + +/// Bitcast a floating-point value to an integer value. Only bitcast the part +/// containing the sign bit if the target has no integer value capable of +/// holding all bits of the floating-point value. +void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, + const SDLoc &DL, + SDValue Value) const { + EVT FloatVT = Value.getValueType(); + unsigned NumBits = FloatVT.getSizeInBits(); + State.FloatVT = FloatVT; + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + // Convert to an integer of the same size. + if (TLI.isTypeLegal(IVT)) { + State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); + State.SignMask = APInt::getSignMask(NumBits); + State.SignBit = NumBits - 1; + return; + } + + auto &DataLayout = DAG.getDataLayout(); + // Store the float to memory, then load the sign part out as an integer. + MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8); + // First create a temporary that is aligned for both the load and store. + SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); + int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + // Then store the float to it. + State.FloatPtr = StackPtr; + MachineFunction &MF = DAG.getMachineFunction(); + State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI); + State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr, + State.FloatPointerInfo); + + SDValue IntPtr; + if (DataLayout.isBigEndian()) { + assert(FloatVT.isByteSized() && "Unsupported floating point type!"); + // Load out a legal integer with the same sign bit as the float. + IntPtr = StackPtr; + State.IntPointerInfo = State.FloatPointerInfo; + } else { + // Advance the pointer so that the loaded byte will contain the sign bit. + unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1; + IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr, + DAG.getConstant(ByteOffset, DL, StackPtr.getValueType())); + State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, + ByteOffset); + } + + State.IntPtr = IntPtr; + State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr, + State.IntPointerInfo, MVT::i8); + State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7); + State.SignBit = 7; +} + +/// Replace the integer value produced by getSignAsIntValue() with a new value +/// and cast the result back to a floating-point type. +SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State, + const SDLoc &DL, + SDValue NewIntValue) const { + if (!State.Chain) + return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue); + + // Override the part containing the sign bit in the value stored on the stack. + SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr, + State.IntPointerInfo, MVT::i8); + return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr, + State.FloatPointerInfo); +} + +SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { + SDLoc DL(Node); + SDValue Mag = Node->getOperand(0); + SDValue Sign = Node->getOperand(1); + + // Get sign bit into an integer value. + FloatSignAsInt SignAsInt; + getSignAsIntValue(SignAsInt, DL, Sign); + + EVT IntVT = SignAsInt.IntValue.getValueType(); + SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT); + SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue, + SignMask); + + // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X) + EVT FloatVT = Mag.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) { + SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag); + SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue); + SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit, + DAG.getConstant(0, DL, IntVT), ISD::SETNE); + return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue); + } + + // Transform Mag value to integer, and clear the sign bit. + FloatSignAsInt MagAsInt; + getSignAsIntValue(MagAsInt, DL, Mag); + EVT MagVT = MagAsInt.IntValue.getValueType(); + SDValue ClearSignMask = DAG.getConstant(~MagAsInt.SignMask, DL, MagVT); + SDValue ClearedSign = DAG.getNode(ISD::AND, DL, MagVT, MagAsInt.IntValue, + ClearSignMask); + + // Get the signbit at the right position for MagAsInt. + int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit; + EVT ShiftVT = IntVT; + if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) { + SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit); + ShiftVT = MagVT; + } + if (ShiftAmount > 0) { + SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, ShiftVT); + SignBit = DAG.getNode(ISD::SRL, DL, ShiftVT, SignBit, ShiftCnst); + } else if (ShiftAmount < 0) { + SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT); + SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst); + } + if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) { + SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit); + } + + // Store the part with the modified sign and convert back to float. + SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit); + return modifySignAsInt(MagAsInt, DL, CopiedSign); +} + +SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { + SDLoc DL(Node); + SDValue Value = Node->getOperand(0); + + // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal. + EVT FloatVT = Value.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) { + SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT); + return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero); + } + + // Transform value to integer, clear the sign bit and transform back. + FloatSignAsInt ValueAsInt; + getSignAsIntValue(ValueAsInt, DL, Value); + EVT IntVT = ValueAsInt.IntValue.getValueType(); + SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT); + SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue, + ClearSignMask); + return modifySignAsInt(ValueAsInt, DL, ClearedSign); +} + +void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, + SmallVectorImpl<SDValue> &Results) { + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" + " not tell us which reg is the stack pointer!"); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = SDValue(Node, 0); + SDValue Tmp2 = SDValue(Node, 1); + SDValue Tmp3 = Node->getOperand(2); + SDValue Chain = Tmp1.getOperand(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); + + SDValue Size = Tmp2.getOperand(1); + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); + Chain = SP.getValue(1); + unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); + unsigned StackAlign = + DAG.getSubtarget().getFrameLowering()->getStackAlignment(); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + if (Align > StackAlign) + Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, + DAG.getConstant(-(uint64_t)Align, dl, VT)); + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain + + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); + + Results.push_back(Tmp1); + Results.push_back(Tmp2); +} + +/// Legalize a SETCC with given LHS and RHS and condition code CC on the current +/// target. +/// +/// If the SETCC has been legalized using AND / OR, then the legalized node +/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert +/// will be set to false. +/// +/// If the SETCC has been legalized by using getSetCCSwappedOperands(), +/// then the values of LHS and RHS will be swapped, CC will be set to the +/// new condition, and NeedInvert will be set to false. +/// +/// If the SETCC has been legalized using the inverse condcode, then LHS and +/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert +/// will be set to true. The caller must invert the result of the SETCC with +/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect +/// of a true/false result. +/// +/// \returns true if the SetCC has been legalized, false if it hasn't. +bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, + SDValue &RHS, SDValue &CC, + bool &NeedInvert, + const SDLoc &dl) { + MVT OpVT = LHS.getSimpleValueType(); + ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + NeedInvert = false; + switch (TLI.getCondCodeAction(CCCode, OpVT)) { + default: llvm_unreachable("Unknown condition code action!"); + case TargetLowering::Legal: + // Nothing to do. + break; + case TargetLowering::Expand: { + ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { + std::swap(LHS, RHS); + CC = DAG.getCondCode(InvCC); + return true; + } + // Swapping operands didn't work. Try inverting the condition. + bool NeedSwap = false; + InvCC = getSetCCInverse(CCCode, OpVT.isInteger()); + if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { + // If inverting the condition is not enough, try swapping operands + // on top of it. + InvCC = ISD::getSetCCSwappedOperands(InvCC); + NeedSwap = true; + } + if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + if (NeedSwap) + std::swap(LHS, RHS); + return true; + } + + ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + unsigned Opc = 0; + switch (CCCode) { + default: llvm_unreachable("Don't know how to expand this condition!"); + case ISD::SETO: + assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) + && "If SETO is expanded, SETOEQ must be legal!"); + CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; + case ISD::SETUO: + assert(TLI.isCondCodeLegal(ISD::SETUNE, OpVT) + && "If SETUO is expanded, SETUNE must be legal!"); + CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETUEQ: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + // If we are floating point, assign and break, otherwise fall through. + if (!OpVT.isInteger()) { + // We can use the 4th bit to tell if we are the unordered + // or ordered version of the opcode. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND; + CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10); + break; + } + // Fallthrough if we are unsigned integer. + LLVM_FALLTHROUGH; + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETLT: + case ISD::SETNE: + case ISD::SETEQ: + // If all combinations of inverting the condition and swapping operands + // didn't work then we have no means to expand the condition. + llvm_unreachable("Don't know how to expand this condition!"); + } + + SDValue SetCC1, SetCC2; + if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { + // If we aren't the ordered or unorder operation, + // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + } else { + // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); + } + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + RHS = SDValue(); + CC = SDValue(); + return true; + } + } + return false; +} + +/// Emit a store/load combination to the stack. This stores +/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does +/// a load from the stack slot to DestVT, extending it if needed. +/// The resultant code need not be legal. +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, + EVT DestVT, const SDLoc &dl) { + return EmitStackConvert(SrcOp, SlotVT, DestVT, dl, DAG.getEntryNode()); +} + +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, + EVT DestVT, const SDLoc &dl, + SDValue Chain) { + // Create the stack frame object. + unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment( + SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); + SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); + int SPFI = StackPtrFI->getIndex(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + + unsigned SrcSize = SrcOp.getValueSizeInBits(); + unsigned SlotSize = SlotVT.getSizeInBits(); + unsigned DestSize = DestVT.getSizeInBits(); + Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); + unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType); + + // Emit a store to the stack slot. Use a truncstore if the input value is + // later than DestVT. + SDValue Store; + + if (SrcSize > SlotSize) + Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo, + SlotVT, SrcAlign); + else { + assert(SrcSize == SlotSize && "Invalid store"); + Store = + DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign); + } + + // Result is a load from the stack slot. + if (SlotSize == DestSize) + return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign); + + assert(SlotSize < DestSize && "Unknown extension!"); + return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT, + DestAlign); +} + +SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { + SDLoc dl(Node); + // Create a vector sized/aligned stack slot, store the value to element #0, + // then load the whole vector back out. + SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr); + int SPFI = StackPtrFI->getIndex(); + + SDValue Ch = DAG.getTruncStore( + DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), + Node->getValueType(0).getVectorElementType()); + return DAG.getLoad( + Node->getValueType(0), dl, Ch, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); +} + +static bool +ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, + const TargetLowering &TLI, SDValue &Res) { + unsigned NumElems = Node->getNumOperands(); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + + // Try to group the scalars into pairs, shuffle the pairs together, then + // shuffle the pairs of pairs together, etc. until the vector has + // been built. This will work only if all of the necessary shuffle masks + // are legal. + + // We do this in two phases; first to check the legality of the shuffles, + // and next, assuming that all shuffles are legal, to create the new nodes. + for (int Phase = 0; Phase < 2; ++Phase) { + SmallVector<std::pair<SDValue, SmallVector<int, 16>>, 16> IntermedVals, + NewIntermedVals; + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.isUndef()) + continue; + + SDValue Vec; + if (Phase) + Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V); + IntermedVals.push_back(std::make_pair(Vec, SmallVector<int, 16>(1, i))); + } + + while (IntermedVals.size() > 2) { + NewIntermedVals.clear(); + for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) { + // This vector and the next vector are shuffled together (simply to + // append the one to the other). + SmallVector<int, 16> ShuffleVec(NumElems, -1); + + SmallVector<int, 16> FinalIndices; + FinalIndices.reserve(IntermedVals[i].second.size() + + IntermedVals[i+1].second.size()); + + int k = 0; + for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = j; + FinalIndices.push_back(IntermedVals[i].second[j]); + } + for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = NumElems + j; + FinalIndices.push_back(IntermedVals[i+1].second[j]); + } + + SDValue Shuffle; + if (Phase) + Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first, + IntermedVals[i+1].first, + ShuffleVec); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + NewIntermedVals.push_back( + std::make_pair(Shuffle, std::move(FinalIndices))); + } + + // If we had an odd number of defined values, then append the last + // element to the array of new vectors. + if ((IntermedVals.size() & 1) != 0) + NewIntermedVals.push_back(IntermedVals.back()); + + IntermedVals.swap(NewIntermedVals); + } + + assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 && + "Invalid number of intermediate vectors"); + SDValue Vec1 = IntermedVals[0].first; + SDValue Vec2; + if (IntermedVals.size() > 1) + Vec2 = IntermedVals[1].first; + else if (Phase) + Vec2 = DAG.getUNDEF(VT); + + SmallVector<int, 16> ShuffleVec(NumElems, -1); + for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[0].second[i]] = i; + for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[1].second[i]] = NumElems + i; + + if (Phase) + Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + } + + return true; +} + +/// Expand a BUILD_VECTOR node on targets that don't +/// support the operation, but do support the resultant vector type. +SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { + unsigned NumElems = Node->getNumOperands(); + SDValue Value1, Value2; + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT OpVT = Node->getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); + + // If the only non-undef value is the low element, turn this into a + // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. + bool isOnlyLowElement = true; + bool MoreThanTwoValues = false; + bool isConstant = true; + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.isUndef()) + continue; + if (i > 0) + isOnlyLowElement = false; + if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) + isConstant = false; + + if (!Value1.getNode()) { + Value1 = V; + } else if (!Value2.getNode()) { + if (V != Value1) + Value2 = V; + } else if (V != Value1 && V != Value2) { + MoreThanTwoValues = true; + } + } + + if (!Value1.getNode()) + return DAG.getUNDEF(VT); + + if (isOnlyLowElement) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0)); + + // If all elements are constants, create a load from the constant pool. + if (isConstant) { + SmallVector<Constant*, 16> CV; + for (unsigned i = 0, e = NumElems; i != e; ++i) { + if (ConstantFPSDNode *V = + dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) { + CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue())); + } else if (ConstantSDNode *V = + dyn_cast<ConstantSDNode>(Node->getOperand(i))) { + if (OpVT==EltVT) + CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + else { + // If OpVT and EltVT don't match, EltVT is not legal and the + // element values have been promoted/truncated earlier. Undo this; + // we don't want a v16i8 to become a v16i32 for example. + const ConstantInt *CI = V->getConstantIntValue(); + CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()), + CI->getZExtValue())); + } + } else { + assert(Node->getOperand(i).isUndef()); + Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); + CV.push_back(UndefValue::get(OpNTy)); + } + } + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = + DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + return DAG.getLoad( + VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + Alignment); + } + + SmallSet<SDValue, 16> DefinedValues; + for (unsigned i = 0; i < NumElems; ++i) { + if (Node->getOperand(i).isUndef()) + continue; + DefinedValues.insert(Node->getOperand(i)); + } + + if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) { + if (!MoreThanTwoValues) { + SmallVector<int, 8> ShuffleVec(NumElems, -1); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.isUndef()) + continue; + ShuffleVec[i] = V == Value1 ? 0 : NumElems; + } + if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { + // Get the splatted value into the low element of a vector register. + SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); + SDValue Vec2; + if (Value2.getNode()) + Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); + else + Vec2 = DAG.getUNDEF(VT); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec); + } + } else { + SDValue Res; + if (ExpandBVWithShuffles(Node, DAG, TLI, Res)) + return Res; + } + } + + // Otherwise, we can't handle this case efficiently. + return ExpandVectorBuildThroughStack(Node); +} + +SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue SplatVal = Node->getOperand(0); + + return DAG.getSplatBuildVector(VT, DL, SplatVal); +} + +// Expand a node into a call to a libcall. If the result value +// does not fit into a register, return the lo part and set the hi part to the +// by-reg argument. If it does fit into a single register, return the result +// and leave the Hi part unset. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, + bool isSigned) { + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (const SDValue &Op : Node->op_values()) { + EVT ArgVT = Op.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op; + Entry.Ty = ArgTy; + Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); + Entry.IsZExt = !TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // TLI.isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + + // isTailCall may be true since the callee does not reference caller stack + // frame. Check if it's in the right position and that the return types match. + SDValue TCChain = InChain; + const Function &F = DAG.getMachineFunction().getFunction(); + bool isTailCall = + TLI.isInTailCallPosition(DAG, Node, TCChain) && + (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy()); + if (isTailCall) + InChain = TCChain; + + TargetLowering::CallLoweringInfo CLI(DAG); + bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, isSigned); + CLI.setDebugLoc(SDLoc(Node)) + .setChain(InChain) + .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, + std::move(Args)) + .setTailCall(isTailCall) + .setSExtResult(signExtend) + .setZExtResult(!signExtend) + .setIsPostTypeLegalization(true); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + if (!CallInfo.second.getNode()) { + LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG)); + // It's a tailcall, return the chain (which is the DAG root). + return DAG.getRoot(); + } + + LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG)); + return CallInfo.first; +} + +// Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair<SDValue, SDValue> +SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.IsSExt = isSigned; + Entry.IsZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)) + .setChain(InChain) + .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, + std::move(Args)) + .setSExtResult(isSigned) + .setZExtResult(!isSigned); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + return CallInfo; +} + +SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { + if (Node->isStrictFPOpcode()) + Node = DAG.mutateStrictFPToFP(Node); + + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = Call_F32; break; + case MVT::f64: LC = Call_F64; break; + case MVT::f80: LC = Call_F80; break; + case MVT::f128: LC = Call_F128; break; + case MVT::ppcf128: LC = Call_PPCF128; break; + } + return ExpandLibCall(LC, Node, false); +} + +SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128) { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC = Call_I8; break; + case MVT::i16: LC = Call_I16; break; + case MVT::i32: LC = Call_I32; break; + case MVT::i64: LC = Call_I64; break; + case MVT::i128: LC = Call_I128; break; + } + return ExpandLibCall(LC, Node, isSigned); +} + +/// Expand the node to a libcall based on first argument type (for instance +/// lround and its variant). +SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { + if (Node->isStrictFPOpcode()) + Node = DAG.mutateStrictFPToFP(Node); + + RTLIB::Libcall LC; + switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = Call_F32; break; + case MVT::f64: LC = Call_F64; break; + case MVT::f80: LC = Call_F80; break; + case MVT::f128: LC = Call_F128; break; + case MVT::ppcf128: LC = Call_PPCF128; break; + } + + return ExpandLibCall(LC, Node, false); +} + +/// Issue libcalls to __{u}divmod to compute div / rem pairs. +void +SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + unsigned Opcode = Node->getOpcode(); + bool isSigned = Opcode == ISD::SDIVREM; + + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (const SDValue &Op : Node->op_values()) { + EVT ArgVT = Op.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op; + Entry.Ty = ArgTy; + Entry.IsSExt = isSigned; + Entry.IsZExt = !isSigned; + Args.push_back(Entry); + } + + // Also pass the return address of the remainder. + SDValue FIPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = FIPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.IsSExt = isSigned; + Entry.IsZExt = !isSigned; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + SDLoc dl(Node); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(InChain) + .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, + std::move(Args)) + .setSExtResult(isSigned) + .setZExtResult(!isSigned); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + // Remainder is loaded back from the stack frame. + SDValue Rem = + DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo()); + Results.push_back(CallInfo.first); + Results.push_back(Rem); +} + +/// Return true if sincos libcall is available. +static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + return TLI.getLibcallName(LC) != nullptr; +} + +/// Only issue sincos libcall if both sin and cos are needed. +static bool useSinCos(SDNode *Node) { + unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN + ? ISD::FCOS : ISD::FSIN; + + SDValue Op0 = Node->getOperand(0); + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node) + continue; + // The other user might have been turned into sincos already. + if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS) + return true; + } + return false; +} + +/// Issue libcalls to sincos to compute sin / cos pairs. +void +SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + // Pass the argument. + Entry.Node = Node->getOperand(0); + Entry.Ty = RetTy; + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + + // Pass the return address of sin. + SDValue SinPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = SinPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + + // Also pass the return address of the cos. + SDValue CosPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = CosPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + SDLoc dl(Node); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain).setLibCallee( + TLI.getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), Callee, + std::move(Args)); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + Results.push_back( + DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, MachinePointerInfo())); + Results.push_back( + DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo())); +} + +/// This function is responsible for legalizing a +/// INT_TO_FP operation of the specified operand when the target requests that +/// we expand it. At this point, we know that the result and operand types are +/// legal for the target. +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, + EVT DestVT, + const SDLoc &dl) { + EVT SrcVT = Op0.getValueType(); + + // TODO: Should any fast-math-flags be set for the created nodes? + LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); + if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { + LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " + "expansion\n"); + + // Get the stack frame index of a 8 byte buffer. + SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); + + // word offset constant for Hi/Lo address computation + SDValue WordOff = DAG.getConstant(sizeof(int), dl, + StackSlot.getValueType()); + // set up Hi and Lo (into buffer) address based on endian + SDValue Hi = StackSlot; + SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), + StackSlot, WordOff); + if (DAG.getDataLayout().isLittleEndian()) + std::swap(Hi, Lo); + + // if signed map to unsigned space + SDValue Op0Mapped; + if (isSigned) { + // constant used to invert sign bit (signed to unsigned mapping) + SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32); + Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); + } else { + Op0Mapped = Op0; + } + // store the lo of the constructed double - based on integer input + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo, + MachinePointerInfo()); + // initial hi portion of constructed double + SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32); + // store the hi of the constructed double - biased exponent + SDValue Store2 = + DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo()); + // load the constructed double + SDValue Load = + DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo()); + // FP constant to bias correct the final result + SDValue Bias = DAG.getConstantFP(isSigned ? + BitsToDouble(0x4330000080000000ULL) : + BitsToDouble(0x4330000000000000ULL), + dl, MVT::f64); + // subtract the bias + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + // final result + SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT); + return Result; + } + assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + // Code below here assumes !isSigned without checking again. + + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + + SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0, + DAG.getConstant(0, dl, SrcVT), ISD::SETLT); + SDValue Zero = DAG.getIntPtrConstant(0, dl), + Four = DAG.getIntPtrConstant(4, dl); + SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(), + SignSet, Four, Zero); + + // If the sign bit of the integer is set, the large number will be treated + // as a negative number. To counteract this, the dynamic code adds an + // offset depending on the data type. + uint64_t FF; + switch (SrcVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported integer type!"); + case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) + case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) + case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) + case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) + } + if (DAG.getDataLayout().isLittleEndian()) + FF <<= 32; + Constant *FudgeFactor = ConstantInt::get( + Type::getInt64Ty(*DAG.getContext()), FF); + + SDValue CPIdx = + DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); + Alignment = std::min(Alignment, 4u); + SDValue FudgeInReg; + if (DestVT == MVT::f32) + FudgeInReg = DAG.getLoad( + MVT::f32, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + Alignment); + else { + SDValue Load = DAG.getExtLoad( + ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, + Alignment); + HandleSDNode Handle(Load); + LegalizeOp(Load.getNode()); + FudgeInReg = Handle.getValue(); + } + + return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); +} + +/// This function is responsible for legalizing a +/// *INT_TO_FP operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP +/// operation that takes a larger input. +SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, + bool isSigned, + const SDLoc &dl) { + // First step, figure out the appropriate *INT_TO_FP operation to use. + EVT NewInTy = LegalOp.getValueType(); + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (true) { + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1); + assert(NewInTy.isInteger() && "Ran out of possibilities!"); + + // If the target supports SINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { + OpToUse = ISD::SINT_TO_FP; + break; + } + if (isSigned) continue; + + // If the target supports UINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { + OpToUse = ISD::UINT_TO_FP; + break; + } + + // Otherwise, try a larger type. + } + + // Okay, we found the operation and type to use. Zero extend our input to the + // desired type then run the operation on it. + return DAG.getNode(OpToUse, dl, DestVT, + DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, NewInTy, LegalOp)); +} + +/// This function is responsible for legalizing a +/// FP_TO_*INT operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT +/// operation that returns a larger result. +SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, + bool isSigned, + const SDLoc &dl) { + // First step, figure out the appropriate FP_TO*INT operation to use. + EVT NewOutTy = DestVT; + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (true) { + NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); + assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + + // A larger signed type can hold all unsigned values of the requested type, + // so using FP_TO_SINT is valid + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { + OpToUse = ISD::FP_TO_SINT; + break; + } + + // However, if the value may be < 0.0, we *must* use some FP_TO_SINT. + if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { + OpToUse = ISD::FP_TO_UINT; + break; + } + + // Otherwise, try a larger type. + } + + // Okay, we found the operation and type to use. + SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); + + // Truncate the result of the extended FP_TO_*INT operation to the desired + // size. + return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); +} + +/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts. +SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Sz = VT.getScalarSizeInBits(); + + SDValue Tmp, Tmp2, Tmp3; + + // If we can, perform BSWAP first and then the mask+swap the i4, then i2 + // and finally the i1 pairs. + // TODO: We can easily support i4/i2 legal types if any target ever does. + if (Sz >= 8 && isPowerOf2_32(Sz)) { + // Create the masks - repeating the pattern every byte. + APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0)); + APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC)); + APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA)); + APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F)); + APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33)); + APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55)); + + // BSWAP if the type is wider than a single byte. + Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op); + + // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + + // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + + // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + return Tmp; + } + + Tmp = DAG.getConstant(0, dl, VT); + for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { + if (I < J) + Tmp2 = + DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT)); + else + Tmp2 = + DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); + + APInt Shift(Sz, 1); + Shift <<= J; + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); + } + + return Tmp; +} + +/// Open code the operations for BSWAP of the specified operation. +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT.getSimpleVT().getScalarType().SimpleTy) { + default: llvm_unreachable("Unhandled Expand type in BSWAP!"); + case MVT::i16: + // Use a rotate by 8. This can be further expanded if necessary. + return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(0xFF0000, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, + DAG.getConstant(255ULL<<48, dl, VT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, + DAG.getConstant(255ULL<<40, dl, VT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, + DAG.getConstant(255ULL<<32, dl, VT)); + Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, + DAG.getConstant(255ULL<<24, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(255ULL<<16, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, + DAG.getConstant(255ULL<<8 , dl, VT)); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); + Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); + } +} + +bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Trying to expand node\n"); + SmallVector<SDValue, 8> Results; + SDLoc dl(Node); + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + bool NeedInvert; + switch (Node->getOpcode()) { + case ISD::ABS: + if (TLI.expandABS(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; + case ISD::CTPOP: + if (TLI.expandCTPOP(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + if (TLI.expandCTLZ(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + if (TLI.expandCTTZ(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; + case ISD::BITREVERSE: + Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl)); + break; + case ISD::BSWAP: + Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); + break; + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::FRAME_TO_ARGS_OFFSET: + Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); + break; + case ISD::EH_DWARF_CFA: { + SDValue CfaArg = DAG.getSExtOrTrunc(Node->getOperand(0), dl, + TLI.getPointerTy(DAG.getDataLayout())); + SDValue Offset = DAG.getNode(ISD::ADD, dl, + CfaArg.getValueType(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, + CfaArg.getValueType()), + CfaArg); + SDValue FA = DAG.getNode( + ISD::FRAMEADDR, dl, TLI.getPointerTy(DAG.getDataLayout()), + DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()))); + Results.push_back(DAG.getNode(ISD::ADD, dl, FA.getValueType(), + FA, Offset)); + break; + } + case ISD::FLT_ROUNDS_: + Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); + break; + case ISD::EH_RETURN: + case ISD::EH_LABEL: + case ISD::PREFETCH: + case ISD::VAEND: + case ISD::EH_SJLJ_LONGJMP: + // If the target didn't expand these, there's nothing to do, so just + // preserve the chain and be done. + Results.push_back(Node->getOperand(0)); + break; + case ISD::READCYCLECOUNTER: + // If the target didn't expand this, just return 'zero' and preserve the + // chain. + Results.append(Node->getNumValues() - 1, + DAG.getConstant(0, dl, Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + break; + case ISD::EH_SJLJ_SETJMP: + // If the target didn't expand this, just return 'zero' and preserve the + // chain. + Results.push_back(DAG.getConstant(0, dl, MVT::i32)); + Results.push_back(Node->getOperand(0)); + break; + case ISD::ATOMIC_LOAD: { + // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. + SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0)); + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(Node)->getMemOperand()); + Results.push_back(Swap.getValue(0)); + Results.push_back(Swap.getValue(1)); + break; + } + case ISD::ATOMIC_STORE: { + // There is no libcall for atomic store; fake it with ATOMIC_SWAP. + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + cast<AtomicSDNode>(Node)->getMemoryVT(), + Node->getOperand(0), + Node->getOperand(1), Node->getOperand(2), + cast<AtomicSDNode>(Node)->getMemOperand()); + Results.push_back(Swap.getValue(1)); + break; + } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and + // splits out the success value as a comparison. Expanding the resulting + // ATOMIC_CMP_SWAP will produce a libcall. + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), + Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand()); + + SDValue ExtRes = Res; + SDValue LHS = Res; + SDValue RHS = Node->getOperand(1); + + EVT AtomicType = cast<AtomicSDNode>(Node)->getMemoryVT(); + EVT OuterType = Node->getValueType(0); + switch (TLI.getExtendForAtomicOps()) { + case ISD::SIGN_EXTEND: + LHS = DAG.getNode(ISD::AssertSext, dl, OuterType, Res, + DAG.getValueType(AtomicType)); + RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OuterType, + Node->getOperand(2), DAG.getValueType(AtomicType)); + ExtRes = LHS; + break; + case ISD::ZERO_EXTEND: + LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res, + DAG.getValueType(AtomicType)); + RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType); + ExtRes = LHS; + break; + case ISD::ANY_EXTEND: + LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType); + RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType); + break; + default: + llvm_unreachable("Invalid atomic op extension"); + } + + SDValue Success = + DAG.getSetCC(dl, Node->getValueType(1), LHS, RHS, ISD::SETEQ); + + Results.push_back(ExtRes.getValue(0)); + Results.push_back(Success); + Results.push_back(Res.getValue(1)); + break; + } + case ISD::DYNAMIC_STACKALLOC: + ExpandDYNAMIC_STACKALLOC(Node, Results); + break; + case ISD::MERGE_VALUES: + for (unsigned i = 0; i < Node->getNumValues(); i++) + Results.push_back(Node->getOperand(i)); + break; + case ISD::UNDEF: { + EVT VT = Node->getValueType(0); + if (VT.isInteger()) + Results.push_back(DAG.getConstant(0, dl, VT)); + else { + assert(VT.isFloatingPoint() && "Unknown value type!"); + Results.push_back(DAG.getConstantFP(0, dl, VT)); + } + break; + } + case ISD::STRICT_FP_ROUND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; + Tmp1 = EmitStackConvert(Node->getOperand(1), + Node->getValueType(0), + Node->getValueType(0), dl, Node->getOperand(0)); + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n"); + return true; + case ISD::FP_ROUND: + case ISD::BITCAST: + Tmp1 = EmitStackConvert(Node->getOperand(0), + Node->getValueType(0), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::STRICT_FP_EXTEND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; + Tmp1 = EmitStackConvert(Node->getOperand(1), + Node->getOperand(1).getValueType(), + Node->getValueType(0), dl, Node->getOperand(0)); + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n"); + return true; + case ISD::FP_EXTEND: + Tmp1 = EmitStackConvert(Node->getOperand(0), + Node->getOperand(0).getValueType(), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::SIGN_EXTEND_INREG: { + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + EVT VT = Node->getValueType(0); + + // An in-register sign-extend of a boolean is a negation: + // 'true' (1) sign-extended is -1. + // 'false' (0) sign-extended is 0. + // However, we must mask the high bits of the source operand because the + // SIGN_EXTEND_INREG does not guarantee that the high bits are already zero. + + // TODO: Do this for vectors too? + if (ExtraVT.getSizeInBits() == 1) { + SDValue One = DAG.getConstant(1, dl, VT); + SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, Zero, And); + Results.push_back(Neg); + break; + } + + // NOTE: we could fall back on load/store here too for targets without + // SRA. However, it is doubtful that any exist. + EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned BitsDiff = VT.getScalarSizeInBits() - + ExtraVT.getScalarSizeInBits(); + SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy); + Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), + Node->getOperand(0), ShiftCst); + Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); + Results.push_back(Tmp1); + break; + } + case ISD::UINT_TO_FP: + if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) { + Results.push_back(Tmp1); + break; + } + LLVM_FALLTHROUGH; + case ISD::SINT_TO_FP: + Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, + Node->getOperand(0), Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; + case ISD::STRICT_FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) { + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n"); + return true; + } + break; + case ISD::FP_TO_UINT: + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) + Results.push_back(Tmp1); + break; + case ISD::STRICT_FP_TO_UINT: + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) { + // Relink the chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2); + // Replace the new UINT result. + ReplaceNodeWithValue(SDValue(Node, 0), Tmp1); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n"); + return true; + } + break; + case ISD::VAARG: + Results.push_back(DAG.expandVAArg(Node)); + Results.push_back(Results[0].getValue(1)); + break; + case ISD::VACOPY: + Results.push_back(DAG.expandVACopy(Node)); + break; + case ISD::EXTRACT_VECTOR_ELT: + if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) + // This must be an access of the only element. Return it. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), + Node->getOperand(0)); + else + Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); + Results.push_back(Tmp1); + break; + case ISD::EXTRACT_SUBVECTOR: + Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); + break; + case ISD::INSERT_SUBVECTOR: + Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0))); + break; + case ISD::CONCAT_VECTORS: + Results.push_back(ExpandVectorBuildThroughStack(Node)); + break; + case ISD::SCALAR_TO_VECTOR: + Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); + break; + case ISD::INSERT_VECTOR_ELT: + Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0), + Node->getOperand(1), + Node->getOperand(2), dl)); + break; + case ISD::VECTOR_SHUFFLE: { + SmallVector<int, 32> NewMask; + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); + + EVT VT = Node->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + if (!TLI.isTypeLegal(EltVT)) { + EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + + // BUILD_VECTOR operands are allowed to be wider than the element type. + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept + // it. + if (NewEltVT.bitsLT(EltVT)) { + // Convert shuffle node. + // If original node was v4i64 and the new EltVT is i32, + // cast operands to v8i32 and re-build the mask. + + // Calculate new VT, the size of the new VT should be equal to original. + EVT NewVT = + EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits() / NewEltVT.getSizeInBits()); + assert(NewVT.bitsEq(VT)); + + // cast operands to new VT + Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); + + // Convert the shuffle mask + unsigned int factor = + NewVT.getVectorNumElements()/VT.getVectorNumElements(); + + // EltVT gets smaller + assert(factor > 0); + + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { + if (Mask[i] < 0) { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]); + } + else { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]*factor+fi); + } + } + Mask = NewMask; + VT = NewVT; + } + EltVT = NewEltVT; + } + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0; i != NumElems; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + continue; + } + unsigned Idx = Mask[i]; + if (Idx < NumElems) + Ops.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + else + Ops.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, + DAG.getConstant(Idx - NumElems, dl, + TLI.getVectorIdxTy(DAG.getDataLayout())))); + } + + Tmp1 = DAG.getBuildVector(VT, dl, Ops); + // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_ELEMENT: { + EVT OpTy = Node->getOperand(0).getValueType(); + if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { + // 1 -> Hi + Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), + DAG.getConstant(OpTy.getSizeInBits() / 2, dl, + TLI.getShiftAmountTy( + Node->getOperand(0).getValueType(), + DAG.getDataLayout()))); + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); + } else { + // 0 -> Lo + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), + Node->getOperand(0)); + } + Results.push_back(Tmp1); + break; + } + case ISD::STACKSAVE: + // Expand to CopyFromReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + } else { + Results.push_back(DAG.getUNDEF(Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::STACKRESTORE: + // Expand to CopyToReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP, + Node->getOperand(1))); + } else { + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::GET_DYNAMIC_AREA_OFFSET: + Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); + Results.push_back(Results[0].getValue(0)); + break; + case ISD::FCOPYSIGN: + Results.push_back(ExpandFCOPYSIGN(Node)); + break; + case ISD::FNEG: + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); + // TODO: If FNEG has fast-math-flags, propagate them to the FSUB. + Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, + Node->getOperand(0)); + Results.push_back(Tmp1); + break; + case ISD::FABS: + Results.push_back(ExpandFABS(Node)); + break; + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: { + // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B + ISD::CondCode Pred; + switch (Node->getOpcode()) { + default: llvm_unreachable("How did we get here?"); + case ISD::SMAX: Pred = ISD::SETGT; break; + case ISD::SMIN: Pred = ISD::SETLT; break; + case ISD::UMAX: Pred = ISD::SETUGT; break; + case ISD::UMIN: Pred = ISD::SETULT; break; + } + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp1, Tmp2, Pred); + Results.push_back(Tmp1); + break; + } + case ISD::FMINNUM: + case ISD::FMAXNUM: { + if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) + Results.push_back(Expanded); + break; + } + case ISD::FSIN: + case ISD::FCOS: { + EVT VT = Node->getValueType(0); + // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / + // fcos which share the same operand and both are used. + if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || + isSinCosLibcallAvailable(Node, TLI)) + && useSinCos(Node)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); + if (Node->getOpcode() == ISD::FCOS) + Tmp1 = Tmp1.getValue(1); + Results.push_back(Tmp1); + } + break; + } + case ISD::FMAD: + llvm_unreachable("Illegal fmad should never be formed"); + + case ISD::FP16_TO_FP: + if (Node->getValueType(0) != MVT::f32) { + // We can extend to types bigger than f32 in two steps without changing + // the result. Since "f16 -> f32" is much more commonly available, give + // CodeGen the option of emitting that before resorting to a libcall. + SDValue Res = + DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); + Results.push_back( + DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); + } + break; + case ISD::FP_TO_FP16: + LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); + if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { + SDValue Op = Node->getOperand(0); + MVT SVT = Op.getSimpleValueType(); + if ((SVT == MVT::f64 || SVT == MVT::f80) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) { + // Under fastmath, we can expand this node into a fround followed by + // a float-half conversion. + SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, + DAG.getIntPtrConstant(0, dl)); + Results.push_back( + DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal)); + } + } + break; + case ISD::ConstantFP: { + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); + // Check to see if this FP immediate is already legal. + // If this is a legal constant, turn it into a TargetConstantFP node. + if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0), + DAG.getMachineFunction().getFunction().hasOptSize())) + Results.push_back(ExpandConstantFP(CFP, true)); + break; + } + case ISD::Constant: { + ConstantSDNode *CP = cast<ConstantSDNode>(Node); + Results.push_back(ExpandConstant(CP)); + break; + } + case ISD::FSUB: { + EVT VT = Node->getValueType(0); + if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { + const SDNodeFlags Flags = Node->getFlags(); + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags); + Results.push_back(Tmp1); + } + break; + } + case ISD::SUB: { + EVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && + TLI.isOperationLegalOrCustom(ISD::XOR, VT) && + "Don't know how to expand this subtraction!"); + Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, + VT)); + Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT)); + Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); + break; + } + case ISD::UREM: + case ISD::SREM: { + EVT VT = Node->getValueType(0); + bool isSigned = Node->getOpcode() == ISD::SREM; + unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + Tmp2 = Node->getOperand(0); + Tmp3 = Node->getOperand(1); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); + Results.push_back(Tmp1); + } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { + // X % Y -> X-X/Y*Y + Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); + Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); + Results.push_back(Tmp1); + } + break; + } + case ISD::UDIV: + case ISD::SDIV: { + bool isSigned = Node->getOpcode() == ISD::SDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + EVT VT = Node->getValueType(0); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + Results.push_back(Tmp1); + } + break; + } + case ISD::MULHU: + case ISD::MULHS: { + unsigned ExpandOpcode = + Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : ISD::SMUL_LOHI; + EVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + + Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + Results.push_back(Tmp1.getValue(1)); + break; + } + case ISD::UMUL_LOHI: + case ISD::SMUL_LOHI: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + MVT VT = LHS.getSimpleValueType(); + unsigned MULHOpcode = + Node->getOpcode() == ISD::UMUL_LOHI ? ISD::MULHU : ISD::MULHS; + + if (TLI.isOperationLegalOrCustom(MULHOpcode, VT)) { + Results.push_back(DAG.getNode(ISD::MUL, dl, VT, LHS, RHS)); + Results.push_back(DAG.getNode(MULHOpcode, dl, VT, LHS, RHS)); + break; + } + + SmallVector<SDValue, 4> Halves; + EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext()); + assert(TLI.isTypeLegal(HalfType)); + if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, Node, LHS, RHS, Halves, + HalfType, DAG, + TargetLowering::MulExpansionKind::Always)) { + for (unsigned i = 0; i < 2; ++i) { + SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Halves[2 * i]); + SDValue Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Halves[2 * i + 1]); + SDValue Shift = DAG.getConstant( + HalfType.getScalarSizeInBits(), dl, + TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); + Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); + Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); + } + break; + } + break; + } + case ISD::MUL: { + EVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + // See if multiply or divide can be lowered using two-result operations. + // We just need the low half of the multiply; try both the signed + // and unsigned forms. If the target supports both SMUL_LOHI and + // UMUL_LOHI, form a preference by checking which forms of plain + // MULH it supports. + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT); + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT); + unsigned OpToUse = 0; + if (HasSMUL_LOHI && !HasMULHS) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI && !HasMULHU) { + OpToUse = ISD::UMUL_LOHI; + } else if (HasSMUL_LOHI) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI) { + OpToUse = ISD::UMUL_LOHI; + } + if (OpToUse) { + Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0), + Node->getOperand(1))); + break; + } + + SDValue Lo, Hi; + EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext()); + if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::SHL, VT) && + TLI.isOperationLegalOrCustom(ISD::OR, VT) && + TLI.expandMUL(Node, Lo, Hi, HalfType, DAG, + TargetLowering::MulExpansionKind::OnlyLegalOrCustom)) { + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); + SDValue Shift = + DAG.getConstant(HalfType.getSizeInBits(), dl, + TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); + Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); + Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); + } + break; + } + case ISD::FSHL: + case ISD::FSHR: + if (TLI.expandFunnelShift(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; + case ISD::ROTL: + case ISD::ROTR: + if (TLI.expandROT(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: + Results.push_back(TLI.expandAddSubSat(Node, DAG)); + break; + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: + Results.push_back(TLI.expandFixedPointMul(Node, DAG)); + break; + case ISD::ADDCARRY: + case ISD::SUBCARRY: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Carry = Node->getOperand(2); + + bool IsAdd = Node->getOpcode() == ISD::ADDCARRY; + + // Initial add of the 2 operands. + unsigned Op = IsAdd ? ISD::ADD : ISD::SUB; + EVT VT = LHS.getValueType(); + SDValue Sum = DAG.getNode(Op, dl, VT, LHS, RHS); + + // Initial check for overflow. + EVT CarryType = Node->getValueType(1); + EVT SetCCType = getSetCCResultType(Node->getValueType(0)); + ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; + SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); + + // Add of the sum and the carry. + SDValue CarryExt = + DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1); + SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt); + + // Second check for overflow. If we are adding, we can only overflow if the + // initial sum is all 1s ang the carry is set, resulting in a new sum of 0. + // If we are subtracting, we can only overflow if the initial sum is 0 and + // the carry is set, resulting in a new sum of all 1s. + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue Overflow2 = + IsAdd ? DAG.getSetCC(dl, SetCCType, Sum2, Zero, ISD::SETEQ) + : DAG.getSetCC(dl, SetCCType, Sum, Zero, ISD::SETEQ); + Overflow2 = DAG.getNode(ISD::AND, dl, SetCCType, Overflow2, + DAG.getZExtOrTrunc(Carry, dl, SetCCType)); + + SDValue ResultCarry = + DAG.getNode(ISD::OR, dl, SetCCType, Overflow, Overflow2); + + Results.push_back(Sum2); + Results.push_back(DAG.getBoolExtOrTrunc(ResultCarry, dl, CarryType, VT)); + break; + } + case ISD::SADDO: + case ISD::SSUBO: { + SDValue Result, Overflow; + TLI.expandSADDSUBO(Node, Result, Overflow, DAG); + Results.push_back(Result); + Results.push_back(Overflow); + break; + } + case ISD::UADDO: + case ISD::USUBO: { + SDValue Result, Overflow; + TLI.expandUADDSUBO(Node, Result, Overflow, DAG); + Results.push_back(Result); + Results.push_back(Overflow); + break; + } + case ISD::UMULO: + case ISD::SMULO: { + SDValue Result, Overflow; + if (TLI.expandMULO(Node, Result, Overflow, DAG)) { + Results.push_back(Result); + Results.push_back(Overflow); + } + break; + } + case ISD::BUILD_PAIR: { + EVT PairTy = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); + Tmp2 = DAG.getNode( + ISD::SHL, dl, PairTy, Tmp2, + DAG.getConstant(PairTy.getSizeInBits() / 2, dl, + TLI.getShiftAmountTy(PairTy, DAG.getDataLayout()))); + Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); + break; + } + case ISD::SELECT: + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + if (Tmp1.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1), + Tmp2, Tmp3, + cast<CondCodeSDNode>(Tmp1.getOperand(2))->get()); + } else { + Tmp1 = DAG.getSelectCC(dl, Tmp1, + DAG.getConstant(0, dl, Tmp1.getValueType()), + Tmp2, Tmp3, ISD::SETNE); + } + Tmp1->setFlags(Node->getFlags()); + Results.push_back(Tmp1); + break; + case ISD::BR_JT: { + SDValue Chain = Node->getOperand(0); + SDValue Table = Node->getOperand(1); + SDValue Index = Node->getOperand(2); + + const DataLayout &TD = DAG.getDataLayout(); + EVT PTy = TLI.getPointerTy(TD); + + unsigned EntrySize = + DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); + + // For power-of-two jumptable entry sizes convert multiplication to a shift. + // This transformation needs to be done here since otherwise the MIPS + // backend will end up emitting a three instruction multiply sequence + // instead of a single shift and MSP430 will call a runtime function. + if (llvm::isPowerOf2_32(EntrySize)) + Index = DAG.getNode( + ISD::SHL, dl, Index.getValueType(), Index, + DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType())); + else + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EntrySize, dl, Index.getValueType())); + SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), + Index, Table); + + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); + SDValue LD = DAG.getExtLoad( + ISD::SEXTLOAD, dl, PTy, Chain, Addr, + MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT); + Addr = LD; + if (TLI.isJumpTableRelative()) { + // For PIC, the sequence is: + // BRIND(load(Jumptable + index) + RelocBase) + // RelocBase can be JumpTable, GOT or some sort of global base. + Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, + TLI.getPICJumpTableRelocBase(Table, DAG)); + } + + Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG); + Results.push_back(Tmp1); + break; + } + case ISD::BRCOND: + // Expand brcond's setcc into its constituent parts and create a BR_CC + // Node. + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + if (Tmp2.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, + Tmp1, Tmp2.getOperand(2), + Tmp2.getOperand(0), Tmp2.getOperand(1), + Node->getOperand(2)); + } else { + // We test only the i1 bit. Skip the AND if UNDEF or another AND. + if (Tmp2.isUndef() || + (Tmp2.getOpcode() == ISD::AND && + isa<ConstantSDNode>(Tmp2.getOperand(1)) && + cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1)) + Tmp3 = Tmp2; + else + Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, + DAG.getConstant(1, dl, Tmp2.getValueType())); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, + DAG.getCondCode(ISD::SETNE), Tmp3, + DAG.getConstant(0, dl, Tmp3.getValueType()), + Node->getOperand(2)); + } + Results.push_back(Tmp1); + break; + case ISD::SETCC: { + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, + Tmp3, NeedInvert, dl); + + if (Legalized) { + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SETCC node. + if (Tmp3.getNode()) + Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3, Node->getFlags()); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); + + Results.push_back(Tmp1); + break; + } + + // Otherwise, SETCC for the given comparison type must be completely + // illegal; expand it into a SELECT_CC. + EVT VT = Node->getValueType(0); + int TrueValue; + switch (TLI.getBooleanContents(Tmp1.getValueType())) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + TrueValue = 1; + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + TrueValue = -1; + break; + } + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, + DAG.getConstant(TrueValue, dl, VT), + DAG.getConstant(0, dl, VT), + Tmp3); + Tmp1->setFlags(Node->getFlags()); + Results.push_back(Tmp1); + break; + } + case ISD::SELECT_CC: { + Tmp1 = Node->getOperand(0); // LHS + Tmp2 = Node->getOperand(1); // RHS + Tmp3 = Node->getOperand(2); // True + Tmp4 = Node->getOperand(3); // False + EVT VT = Node->getValueType(0); + SDValue CC = Node->getOperand(4); + ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get(); + + if (TLI.isCondCodeLegalOrCustom(CCOp, Tmp1.getSimpleValueType())) { + // If the condition code is legal, then we need to expand this + // node using SETCC and SELECT. + EVT CmpVT = Tmp1.getValueType(); + assert(!TLI.isOperationExpand(ISD::SELECT, VT) && + "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be " + "expanded."); + EVT CCVT = getSetCCResultType(CmpVT); + SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC, Node->getFlags()); + Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4)); + break; + } + + // SELECT_CC is legal, so the condition code must not be. + bool Legalized = false; + // Try to legalize by inverting the condition. This is for targets that + // might support an ordered version of a condition, but not the unordered + // version (or vice versa). + ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, + Tmp1.getValueType().isInteger()); + if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) { + // Use the new condition code and swap true and false + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); + Tmp1->setFlags(Node->getFlags()); + } else { + // If The inverse is not legal, then try to swap the arguments using + // the inverse condition code. + ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); + if (TLI.isCondCodeLegalOrCustom(SwapInvCC, Tmp1.getSimpleValueType())) { + // The swapped inverse condition is legal, so swap true and false, + // lhs and rhs. + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC); + Tmp1->setFlags(Node->getFlags()); + } + } + + if (!Legalized) { + Legalized = LegalizeSetCCCondCode( + getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, + dl); + + assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then swap + // the True/False operands to match. + if (NeedInvert) + std::swap(Tmp3, Tmp4); + + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SELECT_CC node. + if (CC.getNode()) { + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3, Tmp4, CC); + } else { + Tmp2 = DAG.getConstant(0, dl, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, + Tmp2, Tmp3, Tmp4, CC); + } + Tmp1->setFlags(Node->getFlags()); + } + Results.push_back(Tmp1); + break; + } + case ISD::BR_CC: { + Tmp1 = Node->getOperand(0); // Chain + Tmp2 = Node->getOperand(2); // LHS + Tmp3 = Node->getOperand(3); // RHS + Tmp4 = Node->getOperand(1); // CC + + bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( + Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); + (void)Legalized; + assert(Legalized && "Can't legalize BR_CC with legal condition!"); + + assert(!NeedInvert && "Don't know how to invert BR_CC!"); + + // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC + // node. + if (Tmp4.getNode()) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, + Tmp4, Tmp2, Tmp3, Node->getOperand(4)); + } else { + Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, + Tmp2, Tmp3, Node->getOperand(4)); + } + Results.push_back(Tmp1); + break; + } + case ISD::BUILD_VECTOR: + Results.push_back(ExpandBUILD_VECTOR(Node)); + break; + case ISD::SPLAT_VECTOR: + Results.push_back(ExpandSPLAT_VECTOR(Node)); + break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector<SDValue, 8> Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0), + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Sh = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1), + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + + SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Results.push_back(TLI.expandVecReduce(Node, DAG)); + break; + case ISD::GLOBAL_OFFSET_TABLE: + case ISD::GlobalAddress: + case ISD::GlobalTLSAddress: + case ISD::ExternalSymbol: + case ISD::ConstantPool: + case ISD::JumpTable: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + // FIXME: Custom lowering for these operations shouldn't return null! + break; + } + + if (Results.empty() && Node->isStrictFPOpcode()) { + // FIXME: We were asked to expand a strict floating-point operation, + // but there is currently no expansion implemented that would preserve + // the "strict" properties. For now, we just fall back to the non-strict + // version if that is legal on the target. The actual mutation of the + // operation will happen in SelectionDAGISel::DoInstructionSelection. + switch (Node->getOpcode()) { + default: + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + return true; + break; + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These are registered by the operand type instead of the value + // type. Reflect that here. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()) + == TargetLowering::Legal) + return true; + break; + } + } + + // Replace the original node with the legalized result. + if (Results.empty()) { + LLVM_DEBUG(dbgs() << "Cannot expand node\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "Successfully expanded node\n"); + ReplaceNode(Node, Results.data()); + return true; +} + +void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n"); + SmallVector<SDValue, 8> Results; + SDLoc dl(Node); + // FIXME: Check flags on the node to see if we can use a finite call. + bool CanUseFiniteLibCall = TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath; + unsigned Opc = Node->getOpcode(); + switch (Opc) { + case ISD::ATOMIC_FENCE: { + // If the target didn't lower this, lower it to '__sync_synchronize()' call + // FIXME: handle "fence singlethread" more efficiently. + TargetLowering::ArgListTy Args; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setLibCallee( + CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + // By default, atomic intrinsics are marked Legal and lowered. Targets + // which don't support them directly, however, may want libcalls, in which + // case they mark them Expand, and we get here. + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_CMP_SWAP: { + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); + + std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } + case ISD::TRAP: { + // If this operation is not supported, lower it to 'abort()' call + TargetLowering::ArgListTy Args; + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol( + "abort", TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + case ISD::FMINNUM: + case ISD::STRICT_FMINNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); + break; + case ISD::FMAXNUM: + case ISD::STRICT_FMAXNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); + break; + case ISD::FSQRT: + case ISD::STRICT_FSQRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); + break; + case ISD::FCBRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64, + RTLIB::CBRT_F80, RTLIB::CBRT_F128, + RTLIB::CBRT_PPCF128)); + break; + case ISD::FSIN: + case ISD::STRICT_FSIN: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); + break; + case ISD::FCOS: + case ISD::STRICT_FCOS: + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128)); + break; + case ISD::FSINCOS: + // Expand into sincos libcall. + ExpandSinCosLibCall(Node, Results); + break; + case ISD::FLOG: + case ISD::STRICT_FLOG: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, + RTLIB::LOG_FINITE_F64, + RTLIB::LOG_FINITE_F80, + RTLIB::LOG_FINITE_F128, + RTLIB::LOG_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); + break; + case ISD::FLOG2: + case ISD::STRICT_FLOG2: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, + RTLIB::LOG2_FINITE_F64, + RTLIB::LOG2_FINITE_F80, + RTLIB::LOG2_FINITE_F128, + RTLIB::LOG2_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); + break; + case ISD::FLOG10: + case ISD::STRICT_FLOG10: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, + RTLIB::LOG10_FINITE_F64, + RTLIB::LOG10_FINITE_F80, + RTLIB::LOG10_FINITE_F128, + RTLIB::LOG10_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); + break; + case ISD::FEXP: + case ISD::STRICT_FEXP: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, + RTLIB::EXP_FINITE_F64, + RTLIB::EXP_FINITE_F80, + RTLIB::EXP_FINITE_F128, + RTLIB::EXP_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); + break; + case ISD::FEXP2: + case ISD::STRICT_FEXP2: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, + RTLIB::EXP2_FINITE_F64, + RTLIB::EXP2_FINITE_F80, + RTLIB::EXP2_FINITE_F128, + RTLIB::EXP2_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); + break; + case ISD::FTRUNC: + case ISD::STRICT_FTRUNC: + Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); + break; + case ISD::FFLOOR: + case ISD::STRICT_FFLOOR: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); + break; + case ISD::FCEIL: + case ISD::STRICT_FCEIL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); + break; + case ISD::FRINT: + case ISD::STRICT_FRINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); + break; + case ISD::FNEARBYINT: + case ISD::STRICT_FNEARBYINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128)); + break; + case ISD::FROUND: + case ISD::STRICT_FROUND: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); + break; + case ISD::FPOWI: + case ISD::STRICT_FPOWI: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128)); + break; + case ISD::FPOW: + case ISD::STRICT_FPOW: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, + RTLIB::POW_FINITE_F64, + RTLIB::POW_FINITE_F80, + RTLIB::POW_FINITE_F128, + RTLIB::POW_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128)); + break; + case ISD::LROUND: + case ISD::STRICT_LROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, + RTLIB::LROUND_F64, RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128)); + break; + case ISD::LLROUND: + case ISD::STRICT_LLROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128)); + break; + case ISD::LRINT: + case ISD::STRICT_LRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, + RTLIB::LRINT_F64, RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128)); + break; + case ISD::LLRINT: + case ISD::STRICT_LLRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128)); + break; + case ISD::FDIV: + Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); + break; + case ISD::FREM: + case ISD::STRICT_FREM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128)); + break; + case ISD::FMA: + case ISD::STRICT_FMA: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_F128, + RTLIB::FMA_PPCF128)); + break; + case ISD::FADD: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); + break; + case ISD::FMUL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); + break; + case ISD::FP16_TO_FP: + if (Node->getValueType(0) == MVT::f32) { + Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + } + break; + case ISD::FP_TO_FP16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); + Results.push_back(ExpandLibCall(LC, Node, false)); + break; + } + case ISD::FSUB: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); + break; + case ISD::SREM: + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128)); + break; + case ISD::UREM: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128)); + break; + case ISD::SDIV: + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128)); + break; + case ISD::UDIV: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128)); + break; + case ISD::SDIVREM: + case ISD::UDIVREM: + // Expand into divrem libcall + ExpandDivRemLibCall(Node, Results); + break; + case ISD::MUL: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128)); + break; + case ISD::CTLZ_ZERO_UNDEF: + switch (Node->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("LibCall explicitly requested, but not available"); + case MVT::i32: + Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false)); + break; + case MVT::i64: + Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false)); + break; + case MVT::i128: + Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false)); + break; + } + break; + } + + // Replace the original node with the legalized result. + if (!Results.empty()) { + LLVM_DEBUG(dbgs() << "Successfully converted node to libcall\n"); + ReplaceNode(Node, Results.data()); + } else + LLVM_DEBUG(dbgs() << "Could not convert node to libcall\n"); +} + +// Determine the vector type to use in place of an original scalar element when +// promoting equally sized vectors. +static MVT getPromotedVectorElementType(const TargetLowering &TLI, + MVT EltVT, MVT NewEltVT) { + unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits(); + MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt); + assert(TLI.isTypeLegal(MidVT) && "unexpected"); + return MidVT; +} + +void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Trying to promote node\n"); + SmallVector<SDValue, 8> Results; + MVT OVT = Node->getSimpleValueType(0); + if (Node->getOpcode() == ISD::UINT_TO_FP || + Node->getOpcode() == ISD::SINT_TO_FP || + Node->getOpcode() == ISD::SETCC || + Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { + OVT = Node->getOperand(0).getSimpleValueType(); + } + if (Node->getOpcode() == ISD::BR_CC) + OVT = Node->getOperand(2).getSimpleValueType(); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + SDLoc dl(Node); + SDValue Tmp1, Tmp2, Tmp3; + switch (Node->getOpcode()) { + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTPOP: + // Zero extend the argument. + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + if (Node->getOpcode() == ISD::CTTZ) { + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + auto TopBit = APInt::getOneBitSet(NVT.getSizeInBits(), + OVT.getSizeInBits()); + Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1, + DAG.getConstant(TopBit, dl, NVT)); + } + // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is + // already the correct result. + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + if (Node->getOpcode() == ISD::CTLZ || + Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { + // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) + Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), dl, NVT)); + } + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); + break; + case ISD::BITREVERSE: + case ISD::BSWAP: { + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Tmp1 = DAG.getNode( + ISD::SRL, dl, NVT, Tmp1, + DAG.getConstant(DiffBits, dl, + TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); + + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); + break; + } + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::FP_TO_SINT, dl); + Results.push_back(Tmp1); + break; + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::SINT_TO_FP, dl); + Results.push_back(Tmp1); + break; + case ISD::VAARG: { + SDValue Chain = Node->getOperand(0); // Get the chain. + SDValue Ptr = Node->getOperand(1); // Get the pointer. + + unsigned TruncOp; + if (OVT.isVector()) { + TruncOp = ISD::BITCAST; + } else { + assert(OVT.isInteger() + && "VAARG promotion is supported only for vectors or integer types"); + TruncOp = ISD::TRUNCATE; + } + + // Perform the larger operation, then convert back + Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2), + Node->getConstantOperandVal(3)); + Chain = Tmp1.getValue(1); + + Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1); + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Tmp2.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } + ReplacedNode(Node); + break; + } + case ISD::MUL: + case ISD::SDIV: + case ISD::SREM: + case ISD::UDIV: + case ISD::UREM: + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + unsigned ExtOp, TruncOp; + if (OVT.isVector()) { + ExtOp = ISD::BITCAST; + TruncOp = ISD::BITCAST; + } else { + assert(OVT.isInteger() && "Cannot promote logic operation"); + + switch (Node->getOpcode()) { + default: + ExtOp = ISD::ANY_EXTEND; + break; + case ISD::SDIV: + case ISD::SREM: + ExtOp = ISD::SIGN_EXTEND; + break; + case ISD::UDIV: + case ISD::UREM: + ExtOp = ISD::ZERO_EXTEND; + break; + } + TruncOp = ISD::TRUNCATE; + } + // Promote each of the values to the new type. + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + // Perform the larger operation, then convert back + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1)); + break; + } + case ISD::UMUL_LOHI: + case ISD::SMUL_LOHI: { + // Promote to a multiply in a wider integer type. + unsigned ExtOp = Node->getOpcode() == ISD::UMUL_LOHI ? ISD::ZERO_EXTEND + : ISD::SIGN_EXTEND; + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::MUL, dl, NVT, Tmp1, Tmp2); + + auto &DL = DAG.getDataLayout(); + unsigned OriginalSize = OVT.getScalarSizeInBits(); + Tmp2 = DAG.getNode( + ISD::SRL, dl, NVT, Tmp1, + DAG.getConstant(OriginalSize, dl, TLI.getScalarShiftAmountTy(DL, NVT))); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2)); + break; + } + case ISD::SELECT: { + unsigned ExtOp, TruncOp; + if (Node->getValueType(0).isVector() || + Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) { + ExtOp = ISD::BITCAST; + TruncOp = ISD::BITCAST; + } else if (Node->getValueType(0).isInteger()) { + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } else { + ExtOp = ISD::FP_EXTEND; + TruncOp = ISD::FP_ROUND; + } + Tmp1 = Node->getOperand(0); + // Promote each of the values to the new type. + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + // Perform the larger operation, then round down. + Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3); + Tmp1->setFlags(Node->getFlags()); + if (TruncOp != ISD::FP_ROUND) + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); + else + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, + DAG.getIntPtrConstant(0, dl)); + Results.push_back(Tmp1); + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); + + // Cast the two input vectors. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1)); + + // Convert the shuffle mask to the right # elements. + Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1); + Results.push_back(Tmp1); + break; + } + case ISD::SETCC: { + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(2))->get(); + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + } + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1, + Tmp2, Node->getOperand(2), Node->getFlags())); + break; + } + case ISD::BR_CC: { + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(1))->get(); + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + } + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3)); + Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), + Node->getOperand(0), Node->getOperand(1), + Tmp1, Tmp2, Node->getOperand(4))); + break; + } + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FPOW: + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, + Node->getFlags()); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(0, dl))); + break; + case ISD::FMA: + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2)); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, + DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), + DAG.getIntPtrConstant(0, dl))); + break; + case ISD::FCOPYSIGN: + case ISD::FPOWI: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = Node->getOperand(1); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + + // fcopysign doesn't change anything but the sign bit, so + // (fp_round (fcopysign (fpext a), b)) + // is as precise as + // (fp_round (fpext a)) + // which is a no-op. Mark it as a TRUNCating FP_ROUND. + const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); + break; + } + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FROUND: + case ISD::FTRUNC: + case ISD::FNEG: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FABS: + case ISD::FEXP: + case ISD::FEXP2: + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp2, DAG.getIntPtrConstant(0, dl))); + break; + case ISD::BUILD_VECTOR: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size + // + // e.g. v2i64 = build_vector i64:x, i64:y => v4i32 + // => + // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y)) + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for build_vector"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + + SmallVector<SDValue, 8> NewOps; + for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) { + SDValue Op = Node->getOperand(I); + NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op)); + } + + SDLoc SL(Node); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps); + SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); + Results.push_back(CvtVec); + break; + } + case ISD::EXTRACT_VECTOR_ELT: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size. + // + // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // + // i64 = bitcast + // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), + // (i32 (extract_vector_elt castx, (2 * y + 1))) + // + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for extract_vector_elt"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); + + SDValue Idx = Node->getOperand(1); + EVT IdxVT = Idx.getValueType(); + SDLoc SL(Node); + SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT); + SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); + + SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); + + SmallVector<SDValue, 8> NewOps; + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); + SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); + + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, + CastVec, TmpIdx); + NewOps.push_back(Elt); + } + + SDValue NewVec = DAG.getBuildVector(MidVT, SL, NewOps); + Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec)); + break; + } + case ISD::INSERT_VECTOR_ELT: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size + // + // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // v2i32:casty = bitcast y:i64 + // + // v2i64 = bitcast + // (v4i32 insert_vector_elt + // (v4i32 insert_vector_elt v4i32:castx, + // (extract_vector_elt casty, 0), 2 * z), + // (extract_vector_elt casty, 1), (2 * z + 1)) + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for insert_vector_elt"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); + + SDValue Val = Node->getOperand(1); + SDValue Idx = Node->getOperand(2); + EVT IdxVT = Idx.getValueType(); + SDLoc SL(Node); + + SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT); + SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); + + SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); + + SDValue NewVec = CastVec; + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); + SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); + + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, + CastVal, IdxOffset); + + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT, + NewVec, Elt, InEltIdx); + } + + Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec)); + break; + } + case ISD::SCALAR_TO_VECTOR: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to different vector type with the same total bit size. + // + // e.g. v2i64 = scalar_to_vector x:i64 + // => + // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef) + // + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + SDValue Val = Node->getOperand(0); + SDLoc SL(Node); + + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); + SDValue Undef = DAG.getUNDEF(MidVT); + + SmallVector<SDValue, 8> NewElts; + NewElts.push_back(CastVal); + for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I) + NewElts.push_back(Undef); + + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts); + SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); + Results.push_back(CvtVec); + break; + } + case ISD::ATOMIC_SWAP: { + AtomicSDNode *AM = cast<AtomicSDNode>(Node); + SDLoc SL(Node); + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal()); + assert(NVT.getSizeInBits() == OVT.getSizeInBits() && + "unexpected promotion type"); + assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() && + "unexpected atomic_swap with illegal type"); + + SDValue NewAtomic + = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT, + DAG.getVTList(NVT, MVT::Other), + { AM->getChain(), AM->getBasePtr(), CastVal }, + AM->getMemOperand()); + Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic)); + Results.push_back(NewAtomic.getValue(1)); + break; + } + } + + // Replace the original node with the legalized result. + if (!Results.empty()) { + LLVM_DEBUG(dbgs() << "Successfully promoted node\n"); + ReplaceNode(Node, Results.data()); + } else + LLVM_DEBUG(dbgs() << "Could not promote node\n"); +} + +/// This is the entry point for the file. +void SelectionDAG::Legalize() { + AssignTopologicalOrder(); + + SmallPtrSet<SDNode *, 16> LegalizedNodes; + // Use a delete listener to remove nodes which were deleted during + // legalization from LegalizeNodes. This is needed to handle the situation + // where a new node is allocated by the object pool to the same address of a + // previously deleted node. + DAGNodeDeletedListener DeleteListener( + *this, + [&LegalizedNodes](SDNode *N, SDNode *E) { LegalizedNodes.erase(N); }); + + SelectionDAGLegalize Legalizer(*this, LegalizedNodes); + + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + while (true) { + bool AnyLegalized = false; + for (auto NI = allnodes_end(); NI != allnodes_begin();) { + --NI; + + SDNode *N = &*NI; + if (N->use_empty() && N != getRoot().getNode()) { + ++NI; + DeleteNode(N); + continue; + } + + if (LegalizedNodes.insert(N).second) { + AnyLegalized = true; + Legalizer.LegalizeOp(N); + + if (N->use_empty() && N != getRoot().getNode()) { + ++NI; + DeleteNode(N); + } + } + } + if (!AnyLegalized) + break; + + } + + // Remove dead nodes now. + RemoveDeadNodes(); +} + +bool SelectionDAG::LegalizeOp(SDNode *N, + SmallSetVector<SDNode *, 16> &UpdatedNodes) { + SmallPtrSet<SDNode *, 16> LegalizedNodes; + SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes); + + // Directly insert the node in question, and legalize it. This will recurse + // as needed through operands. + LegalizedNodes.insert(N); + Legalizer.LegalizeOp(N); + + return LegalizedNodes.count(N); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp new file mode 100644 index 0000000000000..72d052473f115 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -0,0 +1,2330 @@ +//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements float type expansion and softening for LegalizeTypes. +// Softening is the act of turning a computation in an illegal floating point +// type into a computation in an integer type of the same size; also known as +// "soft float". For example, turning f32 arithmetic into operations using i32. +// The resulting integer value is the same as what you would get by performing +// the floating point operation and bitcasting the result to the integer type. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. For example, +// implementing ppcf128 arithmetic in two f64 registers. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "legalize-types" + +/// GetFPLibCall - Return the right libcall for the given floating point type. +static RTLIB::Libcall GetFPLibCall(EVT VT, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { + return + VT == MVT::f32 ? Call_F32 : + VT == MVT::f64 ? Call_F64 : + VT == MVT::f80 ? Call_F80 : + VT == MVT::f128 ? Call_F128 : + VT == MVT::ppcf128 ? Call_PPCF128 : + RTLIB::UNKNOWN_LIBCALL; +} + +//===----------------------------------------------------------------------===// +// Convert Float Results to Integer +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { + LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SoftenFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to soften the result of this operator!"); + + case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; + case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N); break; + case ISD::EXTRACT_VECTOR_ELT: + R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; + case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; + case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; + case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; + case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; + case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; + case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMA: R = SoftenFloatRes_FMA(N); break; + case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; + case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; + case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; + case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; + case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; + case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; + case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; + case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; + case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) { + assert(R.getNode() != N); + SetSoftenedFloat(SDValue(N, ResNo), R); + } +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { + return BitConvertToInteger(N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return BitConvertToInteger(Op); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { + // Convert the inputs to integers, and build a new pair out of them. + return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), + BitConvertToInteger(N->getOperand(0)), + BitConvertToInteger(N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) { + ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N); + // In ppcf128, the high 64 bits are always first in memory regardless + // of Endianness. LLVM's APFloat representation is not Endian sensitive, + // and so always converts into a 128-bit APInt in a non-Endian-sensitive + // way. However, APInt's are serialized in an Endian-sensitive fashion, + // so on big-Endian targets, the two doubles are output in the wrong + // order. Fix this by manually flipping the order of the high 64 bits + // and the low 64 bits here. + if (DAG.getDataLayout().isBigEndian() && + CN->getValueType(0).getSimpleVT() == llvm::MVT::ppcf128) { + uint64_t words[2] = { CN->getValueAPF().bitcastToAPInt().getRawData()[1], + CN->getValueAPF().bitcastToAPInt().getRawData()[0] }; + APInt Val(128, words); + return DAG.getConstant(Val, SDLoc(CN), + TLI.getTypeToTransformTo(*DAG.getContext(), + CN->getValueType(0))); + } else { + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), + TLI.getTypeToTransformTo(*DAG.getContext(), + CN->getValueType(0))); + } +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) { + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + NewOp.getValueType().getVectorElementType(), + NewOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned Size = NVT.getSizeInBits(); + + // Mask = ~(1 << (Size-1)) + APInt API = APInt::getAllOnesValue(Size); + API.clearBit(Size - 1); + SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, + RTLIB::FMIN_F64, + RTLIB::FMIN_F80, + RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), + NVT, Ops, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, + RTLIB::FMAX_F64, + RTLIB::FMAX_F80, + RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), + NVT, Ops, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), + NVT, Ops, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(0)); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + SDLoc dl(N); + + EVT LVT = LHS.getValueType(); + EVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // First get the sign bit of second operand. + SDValue SignBit = DAG.getNode( + ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT), + DAG.getConstant(RSize - 1, dl, + TLI.getShiftAmountTy(RVT, DAG.getDataLayout()))); + SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); + if (SizeDiff > 0) { + SignBit = + DAG.getNode(ISD::SRL, dl, RVT, SignBit, + DAG.getConstant(SizeDiff, dl, + TLI.getShiftAmountTy(SignBit.getValueType(), + DAG.getDataLayout()))); + SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); + } else if (SizeDiff < 0) { + SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); + SignBit = + DAG.getNode(ISD::SHL, dl, LVT, SignBit, + DAG.getConstant(-SizeDiff, dl, + TLI.getShiftAmountTy(SignBit.getValueType(), + DAG.getDataLayout()))); + } + + // Clear the sign bit of the first operand. + SDValue Mask = DAG.getNode( + ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT), + DAG.getConstant(LSize - 1, dl, + TLI.getShiftAmountTy(LVT, DAG.getDataLayout()))); + Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT)); + LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); + + // Or the value with the sign bit. + return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_F128, + RTLIB::COS_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), + NVT, Ops, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[3] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType(), + N->getOperand(2).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + NVT, Ops, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128), + NVT, Ops, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + + EVT FloatVT = N->getValueType(0); + if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) { + // Expand Y = FNEG(X) -> Y = X ^ sign mask + APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)), + DAG.getConstant(SignMask, dl, NVT)); + } + + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), + GetSoftenedFloat(N->getOperand(0)) }; + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + NVT, Ops, CallOptions, dl).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = N->getOperand(0); + + // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's + // entirely possible for both f16 and f32 to be legal, so use the fully + // hard-float FP_EXTEND rather than FP16_TO_FP. + if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { + Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat) + AddToWorklist(Op.getNode()); + } + + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) { + Op = GetPromotedFloat(Op); + // If the promotion did the FP_EXTEND to the destination type for us, + // there's nothing left to do here. + if (Op.getValueType() == N->getValueType(0)) { + return BitConvertToInteger(Op); + } + } + + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; +} + +// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special +// nodes? +SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { + EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); + SDValue Op = N->getOperand(0); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, + CallOptions, SDLoc(N)).first; + if (N->getValueType(0) == MVT::f32) + return Res32; + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = N->getOperand(0); + if (N->getValueType(0) == MVT::f16) { + // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a + // storage-only type get a chance to select things. + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op); + } + + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_F128, + RTLIB::POW_PPCF128), + NVT, Ops, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::i32 && + "Unsupported power type!"); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), + NVT, Ops, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_F128, + RTLIB::REM_PPCF128), + NVT, Ops, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + NVT, Ops, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + if (N->getValueType(0) == MVT::f16) + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { + LoadSDNode *L = cast<LoadSDNode>(N); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDLoc dl(N); + + auto MMOFlags = + L->getMemOperand()->getFlags() & + ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); + SDValue NewL; + if (L->getExtensionType() == ISD::NON_EXTLOAD) { + NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, + L->getChain(), L->getBasePtr(), L->getOffset(), + L->getPointerInfo(), NVT, L->getAlignment(), MMOFlags, + L->getAAInfo()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + if (N != NewL.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return NewL; + } + + // Do a non-extending load followed by FP_EXTEND. + NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(), + dl, L->getChain(), L->getBasePtr(), L->getOffset(), + L->getPointerInfo(), L->getMemoryVT(), L->getAlignment(), + MMOFlags, L->getAAInfo()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); + return BitConvertToInteger(ExtendNode); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(1)); + SDValue RHS = GetSoftenedFloat(N->getOperand(2)); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(2)); + SDValue RHS = GetSoftenedFloat(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDLoc dl(N); + + SDValue NewVAARG; + NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + if (N != NewVAARG.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + return NewVAARG; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { + bool Signed = N->getOpcode() == ISD::SINT_TO_FP; + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + EVT NVT = EVT(); + SDLoc dl(N); + + // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to + // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly + // match. Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE; + t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) { + NVT = (MVT::SimpleValueType)t; + // The source needs to big enough to hold the operand. + if (NVT.bitsGE(SVT)) + LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + // Sign/zero extend the argument if the libcall takes a larger type. + SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + NVT, N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(Signed); + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, + TLI.getTypeToTransformTo(*DAG.getContext(), RVT), + Op, CallOptions, dl).first; +} + + +//===----------------------------------------------------------------------===// +// Convert Float Operand to Integer +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { + LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to soften this operator's operand!"); + + case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; + case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; + case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes + case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break; + case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break; + case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break; + case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break; + case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; + case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this to re-analyze. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand promotion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { + SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); + + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { + // If we get here, the result must be legal but the source illegal. + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (SVT == MVT::f16) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op); + + RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); + + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; +} + + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { + // We actually deal with the partially-softened FP_TO_FP16 node too, which + // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. + assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16); + + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; + + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(2), N->getOperand(3)); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { + bool Signed = N->getOpcode() == ISD::FP_TO_SINT; + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + EVT NVT = EVT(); + SDLoc dl(N); + + // If the result is not legal, eg: fp -> i1, then it needs to be promoted to + // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly + // match, eg. we don't have fp -> i8 conversions. + // Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; + IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; + ++IntVT) { + NVT = (MVT::SimpleValueType)IntVT; + // The type needs to big enough to hold the result. + if (NVT.bitsGE(RVT)) + LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first; + + // Truncate the result if the libcall returns a larger type. + return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); + + // If softenSetCCOperands returned a scalar, use it. + if (!NewRHS.getNode()) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only soften the stored value!"); + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + SDLoc dl(N); + + if (ST->isTruncatingStore()) + // Do an FP_ROUND followed by a non-truncating store. + Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), + Val, DAG.getIntPtrConstant(0, dl))); + else + Val = GetSoftenedFloat(Val); + + return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), + ST->getMemOperand()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LROUND_F32, + RTLIB::LROUND_F64, + RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, + RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LRINT_F32, + RTLIB::LRINT_F64, + RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, + RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128), + NVT, Op, CallOptions, SDLoc(N)).first; +} + +//===----------------------------------------------------------------------===// +// Float Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { + LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand the result of this operator!"); + + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + + case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; + case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; + case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break; + case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; + case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; + case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; + case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; + case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; + case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; + case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; + case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; + case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; + case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; + case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; + case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; + case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break; + case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; + case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; + case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; + case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; + case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedFloat(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + assert(NVT.getSizeInBits() == 64 && + "Do not know how to expand this float constant!"); + APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt(); + SDLoc dl(N); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(64, C.getRawData()[1])), + dl, NVT); + Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(64, C.getRawData()[0])), + dl, NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDLoc dl(N); + SDValue Tmp; + GetExpandedFloat(N->getOperand(0), Lo, Tmp); + Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp); + // Lo = Hi==fabs(Hi) ? Lo : -Lo; + Lo = DAG.getSelectCC(dl, Tmp, Hi, Lo, + DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo), + ISD::SETEQ); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COPYSIGN_F32, + RTLIB::COPYSIGN_F64, + RTLIB::COPYSIGN_F80, + RTLIB::COPYSIGN_F128, + RTLIB::COPYSIGN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), + N->getValueType(0), Ops, CallOptions, + SDLoc(N)).first; + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + TargetLowering::MakeLibCallOptions CallOptions; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, CallOptions, + SDLoc(N)).first; + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128), + N->getValueType(0), Ops, CallOptions, + SDLoc(N)).first; + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + GetExpandedFloat(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0)); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), dl, NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + N->getValueType(0), Ops, CallOptions, + SDLoc(N)).first; + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDLoc dl(N); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + + Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + LD->getMemoryVT(), LD->getMemOperand()); + + // Remember the chain. + Chain = Hi.getValue(1); + + // The low part is zero. + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), dl, NVT); + + // Modified the chain - switch anything that used the old chain to use the + // new one. + ReplaceValueWith(SDValue(LD, 1), Chain); +} + +void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!"); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; + SDLoc dl(N); + + // First do an SINT_TO_FP, whether the original was signed or unsigned. + // When promoting partial word types to i32 we must honor the signedness, + // though. + if (SrcVT.bitsLE(MVT::i32)) { + // The integer can be represented exactly in an f64. + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i32, Src); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), dl, NVT); + Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); + } else { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (SrcVT.bitsLE(MVT::i64)) { + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i64, Src); + LC = RTLIB::SINTTOFP_I64_PPCF128; + } else if (SrcVT.bitsLE(MVT::i128)) { + Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src); + LC = RTLIB::SINTTOFP_I128_PPCF128; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first; + GetPairElements(Hi, Lo, Hi); + } + + if (isSigned) + return; + + // Unsigned - fix up the SINT_TO_FP value just calculated. + Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi); + SrcVT = Src.getValueType(); + + // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128. + static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 }; + static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 }; + static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 }; + ArrayRef<uint64_t> Parts; + + switch (SrcVT.getSimpleVT().SimpleTy) { + default: + llvm_unreachable("Unsupported UINT_TO_FP!"); + case MVT::i32: + Parts = TwoE32; + break; + case MVT::i64: + Parts = TwoE64; + break; + case MVT::i128: + Parts = TwoE128; + break; + } + + // TODO: Are there fast-math-flags to propagate to this FADD? + Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, + DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble(), + APInt(128, Parts)), + dl, MVT::ppcf128)); + Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT), + Lo, Hi, ISD::SETLT); + GetPairElements(Lo, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Float Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatOperand - This method is called when the specified operand of the +/// specified node is found to need expansion. At this point, all of the result +/// types of the node are known to be legal, but other operands of the node may +/// need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { + LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand this operator's operand!"); + + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + + case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break; + case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break; + case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break; + case ISD::LRINT: Res = ExpandFloatOp_LRINT(N); break; + case ISD::LLRINT: Res = ExpandFloatOp_LLRINT(N); break; + case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; + case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + const SDLoc &dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedFloat(NewLHS, LHSLo, LHSHi); + GetExpandedFloat(NewRHS, RHSLo, RHSHi); + + assert(NewLHS.getValueType() == MVT::ppcf128 && "Unsupported setcc type!"); + + // FIXME: This generated code sucks. We want to generate + // FCMPU crN, hi1, hi2 + // BNE crN, L: + // FCMPU crN, lo1, lo2 + // The following can be improved, but not that much. + SDValue Tmp1, Tmp2, Tmp3; + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETOEQ); + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, CCCode); + Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETUNE); + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode); + Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); + NewRHS = SDValue(); // LHS is the result, not a compare. +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(1), Lo, Hi); + // The ppcf128 value is providing only the sign; take it from the + // higher-order double (which must have the larger magnitude). + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), + N->getValueType(0), N->getOperand(0), Hi); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(0), Lo, Hi); + // Round it the rest of the way (e.g. to f32) if needed. + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0), Hi, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { + EVT RVT = N->getValueType(0); + SDLoc dl(N); + + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first; +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { + EVT RVT = N->getValueType(0); + SDLoc dl(N); + + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), + CallOptions, dl).first; +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, use it. + if (!NewRHS.getNode()) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + StoreSDNode *ST = cast<StoreSDNode>(N); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), + ST->getValue().getValueType()); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + (void)NVT; + + SDValue Lo, Hi; + GetExpandedOp(ST->getValue(), Lo, Hi); + + return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr, + ST->getMemoryVT(), ST->getMemOperand()); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) { + EVT RVT = N->getValueType(0); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LROUND_F32, + RTLIB::LROUND_F64, + RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128), + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) { + EVT RVT = N->getValueType(0); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, + RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128), + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) { + EVT RVT = N->getValueType(0); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LRINT_F32, + RTLIB::LRINT_F64, + RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128), + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) { + EVT RVT = N->getValueType(0); + EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, + RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, + RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128), + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; +} + +//===----------------------------------------------------------------------===// +// Float Operand Promotion +//===----------------------------------------------------------------------===// +// + +static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f16) { + return ISD::FP16_TO_FP; + } else if (RetVT == MVT::f16) { + return ISD::FP_TO_FP16; + } + + report_fatal_error("Attempt at an invalid promotion-related conversion"); +} + +bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { + LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue R = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { + LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n"); + return false; + } + + // Nodes that use a promotion-requiring floating point operand, but doesn't + // produce a promotion-requiring floating point result, need to be legalized + // to use the promoted float operand. Nodes that produce at least one + // promotion-requiring floating point result have their operands legalized as + // a part of PromoteFloatResult. + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "PromoteFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + llvm_unreachable("Do not know how to promote this operator's operand!"); + + case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break; + case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break; + case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break; + case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; + case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; + } + + if (R.getNode()) + ReplaceValueWith(SDValue(N, 0), R); + return false; +} + +SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) { + SDValue Op = N->getOperand(0); + EVT OpVT = Op->getValueType(0); + + SDValue Promoted = GetPromotedFloat(N->getOperand(0)); + EVT PromotedVT = Promoted->getValueType(0); + + // Convert the promoted float value to the desired IVT. + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + SDValue Convert = DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), + IVT, Promoted); + // The final result type might not be an scalar so we need a bitcast. The + // bitcast will be further legalized if needed. + return DAG.getBitcast(N->getValueType(0), Convert); +} + +// Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by +// PromoteFloatRes_FCOPYSIGN. +SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { + assert (OpNo == 1 && "Only Operand 1 must need promotion here"); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), Op1); +} + +// Convert the promoted float value to the desired integer type +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + EVT VT = N->getValueType(0); + + // Desired VT is same as promoted type. Use promoted float directly. + if (VT == Op->getValueType(0)) + return Op; + + // Else, extend the promoted float value to the desired VT. + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op); +} + +// Promote the float operands used for comparison. The true- and false- +// operands have the same type as the result and are promoted, if needed, by +// PromoteFloatRes_SELECT_CC +SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) { + SDValue LHS = GetPromotedFloat(N->getOperand(0)); + SDValue RHS = GetPromotedFloat(N->getOperand(1)); + + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), + LHS, RHS, N->getOperand(2), N->getOperand(3), + N->getOperand(4)); +} + +// Construct a SETCC that compares the promoted values and sets the conditional +// code. +SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + + return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode); + +} + +// Lower the promoted Float down to the integer value of same size and construct +// a STORE of the integer value. +SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + SDLoc DL(N); + + SDValue Promoted = GetPromotedFloat(Val); + EVT VT = ST->getOperand(1).getValueType(); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + SDValue NewVal; + NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT), DL, + IVT, Promoted); + + return DAG.getStore(ST->getChain(), DL, NewVal, ST->getBasePtr(), + ST->getMemOperand()); +} + +//===----------------------------------------------------------------------===// +// Float Result Promotion +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { + LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue R = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n"); + return; + } + + switch (N->getOpcode()) { + // These opcodes cannot appear if promotion of FP16 is done in the backend + // instead of Clang + case ISD::FP16_TO_FP: + case ISD::FP_TO_FP16: + default: +#ifndef NDEBUG + dbgs() << "PromoteFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to promote this operator's result!"); + + case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break; + case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break; + case ISD::EXTRACT_VECTOR_ELT: + R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FCOPYSIGN: R = PromoteFloatRes_FCOPYSIGN(N); break; + + // Unary FP Operations + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break; + + // Binary FP Operations + case ISD::FADD: + case ISD::FDIV: + case ISD::FMAXIMUM: + case ISD::FMINIMUM: + case ISD::FMAXNUM: + case ISD::FMINNUM: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break; + + case ISD::FMA: // FMA is same as FMAD + case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break; + + case ISD::FPOWI: R = PromoteFloatRes_FPOWI(N); break; + + case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break; + case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break; + case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break; + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break; + case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; + } + + if (R.getNode()) + SetPromotedFloat(SDValue(N, ResNo), R); +} + +// Bitcast from i16 to f16: convert the i16 to a f32 value instead. +// At this point, it is not possible to determine if the bitcast value is +// eventually stored to memory or promoted to f32 or promoted to a floating +// point at a higher precision. Some of these cases are handled by FP_EXTEND, +// STORE promotion handlers. +SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + // Input type isn't guaranteed to be a scalar int so bitcast if not. The + // bitcast will be legalized further if necessary. + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), + N->getOperand(0).getValueType().getSizeInBits()); + SDValue Cast = DAG.getBitcast(IVT, N->getOperand(0)); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, Cast); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) { + ConstantFPSDNode *CFPNode = cast<ConstantFPSDNode>(N); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Get the (bit-cast) APInt of the APFloat and build an integer constant + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue C = DAG.getConstant(CFPNode->getValueAPF().bitcastToAPInt(), DL, + IVT); + + // Convert the Constant to the desired FP type + // FIXME We might be able to do the conversion during compilation and get rid + // of it from the object code + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, C); +} + +// If the Index operand is a constant, try to redirect the extract operation to +// the correct legalized vector. If not, bit-convert the input vector to +// equivalent integer vector. Extract the element as an (bit-cast) integer +// value and convert it to the promoted type. +SDValue DAGTypeLegalizer::PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDLoc DL(N); + + // If the index is constant, try to extract the value from the legalized + // vector type. + if (isa<ConstantSDNode>(N->getOperand(1))) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT VecVT = Vec->getValueType(0); + EVT EltVT = VecVT.getVectorElementType(); + + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + switch (getTypeAction(VecVT)) { + default: break; + case TargetLowering::TypeScalarizeVector: { + SDValue Res = GetScalarizedVector(N->getOperand(0)); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + case TargetLowering::TypeWidenVector: { + Vec = GetWidenedVector(Vec); + SDValue Res = DAG.getNode(N->getOpcode(), DL, EltVT, Vec, Idx); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + case TargetLowering::TypeSplitVector: { + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + SDValue Res; + if (IdxVal < LoElts) + Res = DAG.getNode(N->getOpcode(), DL, EltVT, Lo, Idx); + else + Res = DAG.getNode(N->getOpcode(), DL, EltVT, Hi, + DAG.getConstant(IdxVal - LoElts, DL, + Idx.getValueType())); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + + } + } + + // Bit-convert the input vector to the equivalent integer vector + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + EVT IVT = NewOp.getValueType().getVectorElementType(); + + // Extract the element as an (bit-cast) integer value + SDValue NewVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IVT, + NewOp, N->getOperand(1)); + + // Convert the element to the desired FP type + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, NewVal); +} + +// FCOPYSIGN(X, Y) returns the value of X with the sign of Y. If the result +// needs promotion, so does the argument X. Note that Y, if needed, will be +// handled during operand promotion. +SDValue DAGTypeLegalizer::PromoteFloatRes_FCOPYSIGN(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + + SDValue Op1 = N->getOperand(1); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +// Unary operation where the result and the operand have PromoteFloat type +// action. Construct a new SDNode with the promoted float value of the old +// operand. +SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op = GetPromotedFloat(N->getOperand(0)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op); +} + +// Binary operations where the result and both operands have PromoteFloat type +// action. Construct a new SDNode with the promoted float values of the old +// operands. +SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags()); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + SDValue Op2 = GetPromotedFloat(N->getOperand(2)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, Op2); +} + +// Promote the Float (first) operand and retain the Integer (second) operand +SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = N->getOperand(1); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +// Explicit operation to reduce precision. Reduce the value to half precision +// and promote it back to the legal type. +SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) { + SDLoc DL(N); + + SDValue Op = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT OpVT = Op->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + // Round promoted float to desired precision + SDValue Round = DAG.getNode(GetPromotionOpcode(OpVT, VT), DL, IVT, Op); + // Promote it back to the legal output type + return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { + LoadSDNode *L = cast<LoadSDNode>(N); + EVT VT = N->getValueType(0); + + // Load the value as an integer value with the same number of bits. + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT, + SDLoc(N), L->getChain(), L->getBasePtr(), + L->getOffset(), L->getPointerInfo(), IVT, + L->getAlignment(), + L->getMemOperand()->getFlags(), + L->getAAInfo()); + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); + + // Convert the integer value to the desired FP type + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL); +} + +// Construct a new SELECT node with the promoted true- and false- values. +SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) { + SDValue TrueVal = GetPromotedFloat(N->getOperand(1)); + SDValue FalseVal = GetPromotedFloat(N->getOperand(2)); + + return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0), + N->getOperand(0), TrueVal, FalseVal); +} + +// Construct a new SELECT_CC node with the promoted true- and false- values. +// The operands used for comparison are promoted by PromoteFloatOp_SELECT_CC. +SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) { + SDValue TrueVal = GetPromotedFloat(N->getOperand(2)); + SDValue FalseVal = GetPromotedFloat(N->getOperand(3)); + + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), + TrueVal.getNode()->getValueType(0), N->getOperand(0), + N->getOperand(1), TrueVal, FalseVal, N->getOperand(4)); +} + +// Construct a SDNode that transforms the SINT or UINT operand to the promoted +// float type. +SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue NV = DAG.getNode(N->getOpcode(), DL, NVT, N->getOperand(0)); + // Round the value to the desired precision (that of the source type). + return DAG.getNode( + ISD::FP_EXTEND, DL, NVT, + DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL))); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + + AtomicSDNode *AM = cast<AtomicSDNode>(N); + SDLoc SL(N); + + SDValue CastVal = BitConvertToInteger(AM->getVal()); + EVT CastVT = CastVal.getValueType(); + + SDValue NewAtomic + = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, CastVT, + DAG.getVTList(CastVT, MVT::Other), + { AM->getChain(), AM->getBasePtr(), CastVal }, + AM->getMemOperand()); + + SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT, + NewAtomic); + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1)); + + return ResultCast; + +} + diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp new file mode 100644 index 0000000000000..d5c1b539adbde --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -0,0 +1,4313 @@ +//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements integer type expansion and promotion for LegalizeTypes. +// Promotion is the act of changing a computation in an illegal type into a +// computation in a larger type. For example, implementing i8 arithmetic in an +// i32 register (often needed on powerpc). +// Expansion is the act of changing a computation in an illegal type into a +// computation in two identical registers of a smaller type. For example, +// implementing i64 arithmetic in two i32 registers (often needed on 32-bit +// targets). +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "legalize-types" + +//===----------------------------------------------------------------------===// +// Integer Result Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerResult - This method is called when a result of a node is +/// found to be in need of promotion to a larger type. At this point, the node +/// may also have invalid operands or may have other results that need +/// expansion, we just know that (at least) one result needs promotion. +void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { + LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n"); + return; + } + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "PromoteIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to promote this operator!"); + case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break; + case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; + case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; + case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break; + case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break; + case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; + case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; + case ISD::Constant: Res = PromoteIntRes_Constant(N); break; + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; + case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; + case ISD::EXTRACT_VECTOR_ELT: + Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break; + case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N)); + break; + case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N)); + break; + case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; + case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; + case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; + case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; + case ISD::SMIN: + case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; + case ISD::UMIN: + case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break; + + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; + case ISD::SIGN_EXTEND_INREG: + Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; + case ISD::SRA: Res = PromoteIntRes_SRA(N); break; + case ISD::SRL: Res = PromoteIntRes_SRL(N); break; + case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; + case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; + case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + + case ISD::EXTRACT_SUBVECTOR: + Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break; + case ISD::VECTOR_SHUFFLE: + Res = PromoteIntRes_VECTOR_SHUFFLE(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break; + case ISD::BUILD_VECTOR: + Res = PromoteIntRes_BUILD_VECTOR(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: + Res = PromoteIntRes_SPLAT_VECTOR(N); break; + case ISD::CONCAT_VECTORS: + Res = PromoteIntRes_CONCAT_VECTORS(N); break; + + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break; + + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; + + case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break; + + case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break; + + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + + case ISD::SDIV: + case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; + + case ISD::UDIV: + case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break; + + case ISD::SADDO: + case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; + case ISD::UADDO: + case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break; + case ISD::SMULO: + case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + + case ISD::ADDE: + case ISD::SUBE: + case ISD::ADDCARRY: + case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break; + + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; + + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break; + + case ISD::ABS: Res = PromoteIntRes_ABS(N); break; + + case ISD::ATOMIC_LOAD: + Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break; + + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: + Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break; + + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo); + break; + + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + Res = PromoteIntRes_VECREDUCE(N); + break; + } + + // If the result is null then the sub-method took care of registering it. + if (Res.getNode()) + SetPromotedInteger(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return GetPromotedInteger(Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { + // Sign-extend the new bits, and continue the assertion. + SDValue Op = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertSext, SDLoc(N), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) { + // Zero the new bits, and continue the assertion. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertZext, SDLoc(N), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { + EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), + N->getMemoryVT(), ResVT, + N->getChain(), N->getBasePtr(), + N->getMemOperand()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), + N->getMemoryVT(), + N->getChain(), N->getBasePtr(), + Op2, N->getMemOperand()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, + unsigned ResNo) { + if (ResNo == 1) { + assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + EVT SVT = getSetCCResultType(N->getOperand(2).getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + + // Only use the result of getSetCCResultType if it is legal, + // otherwise just use the promoted result type (NVT). + if (!TLI.isTypeLegal(SVT)) + SVT = NVT; + + SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs, + N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3), + N->getMemOperand()); + ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); + ReplaceValueWith(SDValue(N, 2), Res.getValue(2)); + return Res.getValue(1); + } + + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Op3 = GetPromotedInteger(N->getOperand(3)); + SDVTList VTs = + DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(), + N->getBasePtr(), Op2, Op3, N->getMemOperand()); + // Update the use to N with the newly created Res. + for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i) + ReplaceValueWith(SDValue(N, i), Res.getValue(i)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + SDLoc dl(N); + + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + break; + case TargetLowering::TypePromoteInteger: + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector()) + // The input promotes to the same size. Convert the promoted value. + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); + break; + case TargetLowering::TypeSoftenFloat: + // Promote the integer operand by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case TargetLowering::TypePromoteFloat: { + // Convert the promoted float by hand. + if (!NOutVT.isVector()) + return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, GetPromotedFloat(InOp)); + break; + } + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + break; + case TargetLowering::TypeScalarizeVector: + // Convert the element to an integer and promote it by hand. + if (!NOutVT.isVector()) + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + break; + case TargetLowering::TypeSplitVector: { + if (!NOutVT.isVector()) { + // For example, i32 = BITCAST v2i16 on alpha. Convert the split + // pieces of the input into integers and reassemble in the final type. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + InOp = DAG.getNode(ISD::ANY_EXTEND, dl, + EVT::getIntegerVT(*DAG.getContext(), + NOutVT.getSizeInBits()), + JoinIntegers(Lo, Hi)); + return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); + } + break; + } + case TargetLowering::TypeWidenVector: + // The input is widened to the same size. Convert to the widened value. + // Make sure that the outgoing value is not a vector, because this would + // make us bitcast between two vectors which are legalized in different ways. + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); + // If the output type is also a vector and widening it to the same size + // as the widened input type would be a legal type, we can widen the bitcast + // and handle the promotion after. + if (NOutVT.isVector()) { + unsigned WidenInSize = NInVT.getSizeInBits(); + unsigned OutSize = OutVT.getSizeInBits(); + if (WidenInSize % OutSize == 0) { + unsigned Scale = WidenInSize / OutSize; + EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(), + OutVT.getVectorElementType(), + OutVT.getVectorNumElements() * Scale); + if (isTypeLegal(WideOutVT)) { + InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp)); + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp, + DAG.getConstant(0, dl, IdxTy)); + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp); + } + } + } + } + + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + CreateStackStoreLoad(InOp, OutVT)); +} + +// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount +// in the VT returned by getShiftAmountTy and to return a safe VT if we can't. +static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT, + const TargetLowering &TLI, + SelectionDAG &DAG) { + EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + // If the value won't fit in the prefered type, just use something safe. It + // will be legalized when the shift is expanded. + if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits()) + ShiftVT = MVT::i32; + return ShiftVT; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + SDLoc dl(N); + + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); + EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, ShiftVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + SDLoc dl(N); + + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); + EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + return DAG.getNode(ISD::SRL, dl, NVT, + DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, ShiftVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { + // The pair element type may be legal, or may not promote to the same type as + // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), JoinIntegers(N->getOperand(0), + N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { + EVT VT = N->getValueType(0); + // FIXME there is no actual debug info here + SDLoc dl(N); + // Zero extend things like i1, sign extend everything else. It shouldn't + // matter in theory which one we pick, but this tends to give better code? + unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue Result = DAG.getNode(Opc, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), VT), + SDValue(N, 0)); + assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?"); + return Result; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + SDLoc dl(N); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); + // Subtract off the extra leading bits in the bigger type. + return DAG.getNode( + ISD::SUB, dl, NVT, Op, + DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, + NVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + SDLoc dl(N); + if (N->getOpcode() == ISD::CTTZ) { + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(), + OVT.getScalarSizeInBits()); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT)); + } + return DAG.getNode(N->getOpcode(), dl, NVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDLoc dl(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // If the input also needs to be promoted, do that first so we can get a + // get a good idea for the output type. + if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType()) + == TargetLowering::TypePromoteInteger) { + SDValue In = GetPromotedInteger(Op0); + + // If the new type is larger than NVT, use it. We probably won't need to + // promote it again. + EVT SVT = In.getValueType().getScalarType(); + if (SVT.bitsGE(NVT)) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1); + return DAG.getAnyExtOrTrunc(Ext, dl, NVT); + } + } + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned NewOpc = N->getOpcode(); + SDLoc dl(N); + + // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is + // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT + // and SINT conversions are Custom, there is no way to tell which is + // preferable. We choose SINT because that's the right thing on PPC.) + if (N->getOpcode() == ISD::FP_TO_UINT && + !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) + NewOpc = ISD::FP_TO_SINT; + + if (N->getOpcode() == ISD::STRICT_FP_TO_UINT && + !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) + NewOpc = ISD::STRICT_FP_TO_SINT; + + SDValue Res; + if (N->isStrictFPOpcode()) { + Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other }, + { N->getOperand(0), N->getOperand(1) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + } else + Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + // + // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example: + // before legalization: fp-to-uint16, 65534. -> 0xfffe + // after legalization: fp-to-sint32, 65534. -> 0x0000fffe + return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || + N->getOpcode() == ISD::STRICT_FP_TO_UINT) ? + ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, + DAG.getValueType(N->getValueType(0).getScalarType())); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + + return DAG.getNode(N->getOpcode(), dl, NVT); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + + if (getTypeAction(N->getOperand(0).getValueType()) + == TargetLowering::TypePromoteInteger) { + SDValue Res = GetPromotedInteger(N->getOperand(0)); + assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!"); + + // If the result and operand types are the same after promotion, simplify + // to an in-register extension. + if (NVT == Res.getValueType()) { + // The high bits are not guaranteed to be anything. Insert an extend. + if (N->getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(N->getOperand(0).getValueType())); + if (N->getOpcode() == ISD::ZERO_EXTEND) + return DAG.getZeroExtendInReg(Res, dl, + N->getOperand(0).getValueType().getScalarType()); + assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); + return Res; + } + } + + // Otherwise, just extend the original operand all the way to the larger type. + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + ISD::LoadExtType ExtType = + ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); + SDLoc dl(N); + SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + N->getMemoryVT(), N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); + + SDLoc dl(N); + SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), + N->getMask(), ExtPassThru, N->getMemoryVT(), + N->getMemOperand(), ISD::EXTLOAD); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); + assert(NVT == ExtPassThru.getValueType() && + "Gather result type and the passThru agrument type should be the same"); + + SDLoc dl(N); + SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(), + N->getIndex(), N->getScale() }; + SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand(), N->getIndexType()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +/// Promote the overflow flag of an overflowing arithmetic node. +SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { + // Change the return type of the boolean result while obeying + // getSetCCResultType. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + EVT VT = N->getValueType(0); + EVT SVT = getSetCCResultType(VT); + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) }; + unsigned NumOps = N->getNumOperands(); + assert(NumOps <= 3 && "Too many operands"); + if (NumOps == 3) + Ops[2] = N->getOperand(2); + + SDLoc dl(N); + SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT), + makeArrayRef(Ops, NumOps)); + + // Modified the sum result - switch anything that used the old sum to use + // the new one. + ReplaceValueWith(SDValue(N, 0), Res); + + // Convert to the expected type. + return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { + // If the promoted type is legal, we can convert this to: + // 1. ANY_EXTEND iN to iM + // 2. SHL by M-N + // 3. [US][ADD|SUB]SAT + // 4. L/ASHR by M-N + // Else it is more efficient to convert this to a min and a max + // operation in the higher precision arithmetic. + SDLoc dl(N); + SDValue Op1 = N->getOperand(0); + SDValue Op2 = N->getOperand(1); + unsigned OldBits = Op1.getScalarValueSizeInBits(); + + unsigned Opcode = N->getOpcode(); + + SDValue Op1Promoted, Op2Promoted; + if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) { + Op1Promoted = ZExtPromotedInteger(Op1); + Op2Promoted = ZExtPromotedInteger(Op2); + } else { + Op1Promoted = SExtPromotedInteger(Op1); + Op2Promoted = SExtPromotedInteger(Op2); + } + EVT PromotedType = Op1Promoted.getValueType(); + unsigned NewBits = PromotedType.getScalarSizeInBits(); + + if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { + unsigned ShiftOp; + switch (Opcode) { + case ISD::SADDSAT: + case ISD::SSUBSAT: + ShiftOp = ISD::SRA; + break; + case ISD::UADDSAT: + case ISD::USUBSAT: + ShiftOp = ISD::SRL; + break; + default: + llvm_unreachable("Expected opcode to be signed or unsigned saturation " + "addition or subtraction"); + } + + unsigned SHLAmount = NewBits - OldBits; + EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); + Op2Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); + + SDValue Result = + DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + } else { + if (Opcode == ISD::USUBSAT) { + SDValue Max = + DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted); + } + + if (Opcode == ISD::UADDSAT) { + APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Add = + DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax); + } + + unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB; + APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits); + APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits); + SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Result = + DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted); + Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax); + Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin); + return Result; + } +} + +SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { + // Can just promote the operands then continue with operation. + SDLoc dl(N); + SDValue Op1Promoted, Op2Promoted; + bool Signed = + N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT; + bool Saturating = + N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT; + if (Signed) { + Op1Promoted = SExtPromotedInteger(N->getOperand(0)); + Op2Promoted = SExtPromotedInteger(N->getOperand(1)); + } else { + Op1Promoted = ZExtPromotedInteger(N->getOperand(0)); + Op2Promoted = ZExtPromotedInteger(N->getOperand(1)); + } + EVT OldType = N->getOperand(0).getValueType(); + EVT PromotedType = Op1Promoted.getValueType(); + unsigned DiffSize = + PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits(); + + if (Saturating) { + // Promoting the operand and result values changes the saturation width, + // which is extends the values that we clamp to on saturation. This could be + // resolved by shifting one of the operands the same amount, which would + // also shift the result we compare against, then shifting back. + EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, + DAG.getConstant(DiffSize, dl, ShiftTy)); + SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, + Op2Promoted, N->getOperand(2)); + unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL; + return DAG.getNode(ShiftOp, dl, PromotedType, Result, + DAG.getConstant(DiffSize, dl, ShiftTy)); + } + return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted, + N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // sign extension of its truncation to the original type. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); + SDLoc dl(N); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: sign extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(OVT)); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(1)); + SDValue RHS = GetPromotedInteger(N->getOperand(2)); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { + SDValue Mask = N->getOperand(0); + + SDValue LHS = GetPromotedInteger(N->getOperand(1)); + SDValue RHS = GetPromotedInteger(N->getOperand(2)); + return DAG.getNode(ISD::VSELECT, SDLoc(N), + LHS.getValueType(), Mask, LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(2)); + SDValue RHS = GetPromotedInteger(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { + EVT InVT = N->getOperand(0).getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + EVT SVT = getSetCCResultType(InVT); + + // If we got back a type that needs to be promoted, this likely means the + // the input type also needs to be promoted. So get the promoted type for + // the input and try the query again. + if (getTypeAction(SVT) == TargetLowering::TypePromoteInteger) { + if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) { + InVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + SVT = getSetCCResultType(InVT); + } else { + // Input type isn't promoted, just use the default promoted type. + SVT = NVT; + } + } + + SDLoc dl(N); + assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && + "Vector compare must return a vector result!"); + + // Get the SETCC result using the canonical SETCC type. + SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); + + // Convert to the expected type. + return DAG.getSExtOrTrunc(SetCC, dl, NVT); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(0)); + SDValue RHS = N->getOperand(1); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { + // The input may have strange things in the top bits of the registers, but + // these operations don't care. They may have weird bits going out, but + // that too is okay if they are integer operations. + SDValue LHS = GetPromotedInteger(N->getOperand(0)); + SDValue RHS = GetPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { + // The input value must be properly sign extended. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = N->getOperand(1); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { + // The input value must be properly zero extended. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = N->getOperand(1); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res; + SDValue InOp = N->getOperand(0); + SDLoc dl(N); + + switch (getTypeAction(InOp.getValueType())) { + default: llvm_unreachable("Unknown type action!"); + case TargetLowering::TypeLegal: + case TargetLowering::TypeExpandInteger: + Res = InOp; + break; + case TargetLowering::TypePromoteInteger: + Res = GetPromotedInteger(InOp); + break; + case TargetLowering::TypeSplitVector: { + EVT InVT = InOp.getValueType(); + assert(InVT.isVector() && "Cannot split scalar types"); + unsigned NumElts = InVT.getVectorNumElements(); + assert(NumElts == NVT.getVectorNumElements() && + "Dst and Src must have the same number of elements"); + assert(isPowerOf2_32(NumElts) && + "Promoted vector type must be a power of two"); + + SDValue EOp1, EOp2; + GetSplitVector(InOp, EOp1, EOp2); + + EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), + NumElts/2); + EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); + EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2); + } + case TargetLowering::TypeWidenVector: { + SDValue WideInOp = GetWidenedVector(InOp); + + // Truncate widened InOp. + unsigned NumElem = WideInOp.getValueType().getVectorNumElements(); + EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), + N->getValueType(0).getScalarType(), NumElem); + SDValue WideTrunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, WideInOp); + + // Zero extend so that the elements are of same type as those of NVT + EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), NVT.getVectorElementType(), + NumElem); + SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc); + + // Extract the low NVT subvector. + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, dl, IdxTy); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx); + } + } + + // Truncate to NVT instead of VT + return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // zero extension of its truncation to the original type. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); + SDLoc dl(N); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: zero extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT.getScalarType()); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that +// the third operand of ADDE/SUBE nodes is carry flag, which differs from +// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean. +SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // We need to sign-extend the operands so the carry value computed by the + // wide operation will be equivalent to the carry value computed by the + // narrow operation. + // An ADDCARRY can generate carry only if any of the operands has its + // most significant bit set. Sign extension propagates the most significant + // bit into the higher bits which means the extra bit that the narrow + // addition would need (i.e. the carry) will be propagated through the higher + // bits of the wide addition. + // A SUBCARRY can generate borrow only if LHS < RHS and this property will be + // preserved by sign extension. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + + EVT ValueVTs[] = {LHS.getValueType(), N->getValueType(1)}; + + // Do the arithmetic in the wide type. + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs), + LHS, RHS, N->getOperand(2)); + + // Update the users of the original carry/borrow value. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + + return SDValue(Res.getNode(), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) { + SDValue Op0 = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { + // Promote the overflow bit trivially. + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + SDLoc DL(N); + EVT SmallVT = LHS.getValueType(); + + // To determine if the result overflowed in a larger type, we extend the + // input to the larger type, do the multiply (checking if it overflows), + // then also check the high bits of the result to see if overflow happened + // there. + if (N->getOpcode() == ISD::SMULO) { + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); + } else { + LHS = ZExtPromotedInteger(LHS); + RHS = ZExtPromotedInteger(RHS); + } + SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1)); + SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS); + + // Overflow occurred if it occurred in the larger type, or if the high part + // of the result does not zero/sign-extend the low part. Check this second + // possibility first. + SDValue Overflow; + if (N->getOpcode() == ISD::UMULO) { + // Unsigned overflow occurred if the high part is non-zero. + unsigned Shift = SmallVT.getScalarSizeInBits(); + EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(), + TLI, DAG); + SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, + DAG.getConstant(Shift, DL, ShiftTy)); + Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, + DAG.getConstant(0, DL, Hi.getValueType()), + ISD::SETNE); + } else { + // Signed overflow occurred if the high part does not sign extend the low. + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(), + Mul, DAG.getValueType(SmallVT)); + Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE); + } + + // The only other way for overflow to occur is if the multiplication in the + // larger type itself overflowed. + Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow, + SDValue(Mul.getNode(), 1)); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Overflow); + return Mul; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + EVT VT = N->getValueType(0); + SDLoc dl(N); + + MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT); + // The argument is passed as NumRegs registers of type RegVT. + + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned i = 0; i < NumRegs; ++i) { + Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); + Chain = Parts[i].getValue(1); + } + + // Handle endianness of the load. + if (DAG.getDataLayout().isBigEndian()) + std::reverse(Parts.begin(), Parts.end()); + + // Assemble the parts in the promoted type. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]); + for (unsigned i = 1; i < NumRegs; ++i) { + SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); + // Shift it to the right position and "or" it in. + Part = DAG.getNode(ISD::SHL, dl, NVT, Part, + DAG.getConstant(i * RegVT.getSizeInBits(), dl, + TLI.getPointerTy(DAG.getDataLayout()))); + Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part); + } + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); + + return Res; +} + +//===----------------------------------------------------------------------===// +// Integer Operand Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need promotion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { + LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { + LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n"); + return false; + } + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + llvm_unreachable("Do not know how to promote this operator's operand!"); + + case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; + case ISD::ATOMIC_STORE: + Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N)); + break; + case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break; + case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break; + case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; + case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break; + case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: + Res = PromoteIntOp_SPLAT_VECTOR(N); break; + case ISD::VSELECT: + case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; + case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; + case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; + case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N), + OpNo); break; + case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N), + OpNo); break; + case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N), + OpNo); break; + case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N), + OpNo); break; + case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; + case ISD::FP16_TO_FP: + case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; + case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + + case ISD::ADDCARRY: + case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break; + + case ISD::FRAMEADDR: + case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break; + + case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break; + + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break; + + case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; + + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// PromoteSetCCOperands - Promote the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, + ISD::CondCode CCCode) { + // We have to insert explicit sign or zero extends. Note that we could + // insert sign extends for ALL conditions. For those operations where either + // zero or sign extension would be valid, use SExtOrZExtPromotedInteger + // which will choose the cheapest for the target. + switch (CCCode) { + default: llvm_unreachable("Unknown integer comparison!"); + case ISD::SETEQ: + case ISD::SETNE: { + SDValue OpL = GetPromotedInteger(NewLHS); + SDValue OpR = GetPromotedInteger(NewRHS); + + // We would prefer to promote the comparison operand with sign extension. + // If the width of OpL/OpR excluding the duplicated sign bits is no greater + // than the width of NewLHS/NewRH, we can avoid inserting real truncate + // instruction, which is redundant eventually. + unsigned OpLEffectiveBits = + OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; + unsigned OpREffectiveBits = + OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; + if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() && + OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) { + NewLHS = OpL; + NewRHS = OpR; + } else { + NewLHS = SExtOrZExtPromotedInteger(NewLHS); + NewRHS = SExtOrZExtPromotedInteger(NewRHS); + } + break; + } + case ISD::SETUGE: + case ISD::SETUGT: + case ISD::SETULE: + case ISD::SETULT: + NewLHS = SExtOrZExtPromotedInteger(NewLHS); + NewRHS = SExtOrZExtPromotedInteger(NewRHS); + break; + case ISD::SETGE: + case ISD::SETGT: + case ISD::SETLT: + case ISD::SETLE: + NewLHS = SExtPromotedInteger(NewLHS); + NewRHS = SExtPromotedInteger(NewRHS); + break; + } +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), + N->getChain(), N->getBasePtr(), Op2, N->getMemOperand()); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) { + // This should only occur in unusual situations like bitcasting to an + // x86_fp80, so just turn it into a store+load + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 2 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get()); + + // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always + // legal types. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)), + 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "only know how to promote condition"); + + // Promote all the way up to the canonical SetCC type. + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other); + + // The chain (Op#0) and basic block destination (Op#2) are always legal types. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, + N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { + // Since the result type is legal, the operands must promote to it. + EVT OVT = N->getOperand(0).getValueType(); + SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); + SDValue Hi = GetPromotedInteger(N->getOperand(1)); + assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); + SDLoc dl(N); + + Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, + DAG.getConstant(OVT.getSizeInBits(), dl, + TLI.getPointerTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type is not. This implies + // that the vector is a power-of-two in length and that the element + // type does not have a strange size (eg: it is not i1). + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && + "Legal vector of one illegal element?"); + + // Promote the inserted value. The type does not need to match the + // vector element type. Check that any extra bits introduced will be + // truncated away. + assert(N->getOperand(0).getValueSizeInBits() >= + N->getValueType(0).getScalarSizeInBits() && + "Type of inserted value narrower than vector element type!"); + + SmallVector<SDValue, 16> NewOps; + for (unsigned i = 0; i < NumElts; ++i) + NewOps.push_back(GetPromotedInteger(N->getOperand(i))); + + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, + unsigned OpNo) { + if (OpNo == 1) { + // Promote the inserted value. This is valid because the type does not + // have to match the vector element type. + + // Check that any extra bits introduced will be truncated away. + assert(N->getOperand(1).getValueSizeInBits() >= + N->getValueType(0).getScalarSizeInBits() && + "Type of inserted value narrower than vector element type!"); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + GetPromotedInteger(N->getOperand(1)), + N->getOperand(2)), + 0); + } + + assert(OpNo == 2 && "Different operand and result vector types?"); + + // Promote the index. + SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N), + TLI.getVectorIdxTy(DAG.getDataLayout())); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), Idx), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { + // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote + // the operand in place. + return SDValue(DAG.UpdateNodeOperands(N, + GetPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) { + // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the + // operand in place. + return SDValue( + DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Only know how to promote the condition!"); + SDValue Cond = N->getOperand(0); + EVT OpTy = N->getOperand(1).getValueType(); + + if (N->getOpcode() == ISD::VSELECT) + if (SDValue Res = WidenVSELECTAndMask(N)) + return Res; + + // Promote all the way up to the canonical SetCC type. + EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy; + Cond = PromoteTargetBoolean(Cond, OpVT); + + return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1), + N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get()); + + // The CC (#4) and the possible return values (#2 and #3) have legal types. + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get()); + + // The CC (#2) is always legal. + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + SDLoc dl(N); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), + Op, DAG.getValueType(N->getOperand(0).getValueType())); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, + SExtPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); + SDLoc dl(N); + + SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. + + // Truncate the value and store the result. + return DAG.getTruncStore(Ch, dl, Val, Ptr, + N->getMemoryVT(), N->getMemOperand()); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, + unsigned OpNo) { + + SDValue DataOp = N->getValue(); + EVT DataVT = DataOp.getValueType(); + SDValue Mask = N->getMask(); + SDLoc dl(N); + + bool TruncateStore = false; + if (OpNo == 3) { + Mask = PromoteTargetBoolean(Mask, DataVT); + // Update in place. + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + NewOps[3] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + } else { // Data operand + assert(OpNo == 1 && "Unexpected operand for promotion"); + DataOp = GetPromotedInteger(DataOp); + TruncateStore = true; + } + + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, + N->getMemoryVT(), N->getMemOperand(), + TruncateStore, N->isCompressingStore()); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, + unsigned OpNo) { + assert(OpNo == 2 && "Only know how to promote the mask!"); + EVT DataVT = N->getValueType(0); + SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, + unsigned OpNo) { + + SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { + // The Mask + EVT DataVT = N->getValueType(0); + NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else if (OpNo == 4) { + // The Index + if (N->isIndexSigned()) + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + else + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); + } else + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, + unsigned OpNo) { + SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { + // The Mask + EVT DataVT = N->getValue().getValueType(); + NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else if (OpNo == 4) { + // The Index + if (N->isIndexSigned()) + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + else + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); + } else + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, + ZExtPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { + SDLoc dl(N); + SDValue Op = GetPromotedInteger(N->getOperand(0)); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getZeroExtendInReg(Op, dl, + N->getOperand(0).getValueType().getScalarType()); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { + assert(OpNo == 2 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDLoc DL(N); + + Carry = PromoteTargetBoolean(Carry, LHS.getValueType()); + + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) { + SDValue Op2 = ZExtPromotedInteger(N->getOperand(2)); + return SDValue( + DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) { + // Promote the RETURNADDR/FRAMEADDR argument to a supported integer width. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) { + assert(OpNo > 1 && "Don't know how to promote this operand!"); + // Promote the rw, locality, and cache type arguments to a supported integer + // width. + SDValue Op2 = ZExtPromotedInteger(N->getOperand(2)); + SDValue Op3 = ZExtPromotedInteger(N->getOperand(3)); + SDValue Op4 = ZExtPromotedInteger(N->getOperand(4)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), + Op2, Op3, Op4), + 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) { + SDValue Op = SExtPromotedInteger(N->getOperand(1)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { + SDLoc dl(N); + SDValue Op; + switch (N->getOpcode()) { + default: llvm_unreachable("Expected integer vector reduction"); + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + Op = GetPromotedInteger(N->getOperand(0)); + break; + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + Op = SExtPromotedInteger(N->getOperand(0)); + break; + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + Op = ZExtPromotedInteger(N->getOperand(0)); + break; + } + + EVT EltVT = Op.getValueType().getVectorElementType(); + EVT VT = N->getValueType(0); + if (VT.bitsGE(EltVT)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op); + + // Result size must be >= element size. If this is not the case after + // promotion, also promote the result type and then truncate. + SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce); +} + +//===----------------------------------------------------------------------===// +// Integer Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { + LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); + dbgs() << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to expand the result of this " + "operator!"); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + + case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; + case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; + case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; + case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break; + case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; + case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break; + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; + case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; + case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break; + case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; + case ISD::LLROUND: ExpandIntRes_LLROUND(N, Lo, Hi); break; + case ISD::LLRINT: ExpandIntRes_LLRINT(N, Lo, Hi); break; + case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; + case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; + case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; + case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; + case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; + case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break; + case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break; + case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break; + case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break; + case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break; + + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_CMP_SWAP: { + std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N); + SplitInteger(Tmp.first, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Tmp.second); + break; + } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + AtomicSDNode *AN = cast<AtomicSDNode>(N); + SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other); + SDValue Tmp = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs, + N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), + AN->getMemOperand()); + + // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine + // success simply by comparing the loaded value against the ingoing + // comparison. + SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp, + N->getOperand(2), ISD::SETEQ); + + SplitInteger(Tmp, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Success); + ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1)); + break; + } + + case ISD::AND: + case ISD::OR: + case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; + + case ISD::UMAX: + case ISD::SMAX: + case ISD::UMIN: + case ISD::SMIN: ExpandIntRes_MINMAX(N, Lo, Hi); break; + + case ISD::ADD: + case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break; + + case ISD::ADDC: + case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break; + + case ISD::ADDE: + case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break; + + case ISD::ADDCARRY: + case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; + + case ISD::SADDO: + case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; + case ISD::UADDO: + case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; + case ISD::UMULO: + case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break; + + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break; + + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break; + + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedInteger(SDValue(N, ResNo), Lo, Hi); +} + +/// Lower an atomic node to the appropriate builtin call. +std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { + unsigned Opc = Node->getOpcode(); + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); + + return ExpandChainLibCall(LC, Node, false); +} + +/// N is a shift by a value that needs to be expanded, +/// and the shift amount is a constant 'Amt'. Expand the operation. +void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, + SDValue &Lo, SDValue &Hi) { + SDLoc DL(N); + // Expand the incoming operand to be shifted, so that we have its parts + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization + // splitted a vector shift, like this: <op1, op2> SHL <0, 2>. + if (!Amt) { + Lo = InL; + Hi = InH; + return; + } + + EVT NVT = InL.getValueType(); + unsigned VTBits = N->getValueType(0).getSizeInBits(); + unsigned NVTBits = NVT.getSizeInBits(); + EVT ShTy = N->getOperand(1).getValueType(); + + if (N->getOpcode() == ISD::SHL) { + if (Amt.ugt(VTBits)) { + Lo = Hi = DAG.getConstant(0, DL, NVT); + } else if (Amt.ugt(NVTBits)) { + Lo = DAG.getConstant(0, DL, NVT); + Hi = DAG.getNode(ISD::SHL, DL, + NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy)); + } else if (Amt == NVTBits) { + Lo = DAG.getConstant(0, DL, NVT); + Hi = InL; + } else { + Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy)); + Hi = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getConstant(Amt, DL, ShTy)), + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getConstant(-Amt + NVTBits, DL, ShTy))); + } + return; + } + + if (N->getOpcode() == ISD::SRL) { + if (Amt.ugt(VTBits)) { + Lo = Hi = DAG.getConstant(0, DL, NVT); + } else if (Amt.ugt(NVTBits)) { + Lo = DAG.getNode(ISD::SRL, DL, + NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy)); + Hi = DAG.getConstant(0, DL, NVT); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getConstant(0, DL, NVT); + } else { + Lo = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getConstant(Amt, DL, ShTy)), + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getConstant(-Amt + NVTBits, DL, ShTy))); + Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); + } + return; + } + + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + if (Amt.ugt(VTBits)) { + Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(NVTBits - 1, DL, ShTy)); + } else if (Amt.ugt(NVTBits)) { + Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(Amt - NVTBits, DL, ShTy)); + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(NVTBits - 1, DL, ShTy)); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(NVTBits - 1, DL, ShTy)); + } else { + Lo = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getConstant(Amt, DL, ShTy)), + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getConstant(-Amt + NVTBits, DL, ShTy))); + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); + } +} + +/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify +/// this shift based on knowledge of the high bit of the shift amount. If we +/// can tell this, we know that it is >= 32 or < 32, without knowing the actual +/// shift amount. +bool DAGTypeLegalizer:: +ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); + unsigned ShBits = ShTy.getScalarSizeInBits(); + unsigned NVTBits = NVT.getScalarSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + SDLoc dl(N); + + APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); + KnownBits Known = DAG.computeKnownBits(N->getOperand(1)); + + // If we don't know anything about the high bits, exit. + if (((Known.Zero|Known.One) & HighBitMask) == 0) + return false; + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + // If we know that any of the high bits of the shift amount are one, then we + // can do this as a couple of simple shifts. + if (Known.One.intersects(HighBitMask)) { + // Mask out the high bit, which we know is set. + Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, + DAG.getConstant(~HighBitMask, dl, ShTy)); + + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: + Lo = DAG.getConstant(0, dl, NVT); // Low part is zero. + Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. + return true; + case ISD::SRL: + Hi = DAG.getConstant(0, dl, NVT); // Hi part is zero. + Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + case ISD::SRA: + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. + DAG.getConstant(NVTBits - 1, dl, ShTy)); + Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + } + } + + // If we know that all of the high bits of the shift amount are zero, then we + // can do this as a couple of simple shifts. + if (HighBitMask.isSubsetOf(Known.Zero)) { + // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined + // shift if x is zero. We can use XOR here because x is known to be smaller + // than 32. + SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt, + DAG.getConstant(NVTBits - 1, dl, ShTy)); + + unsigned Op1, Op2; + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break; + case ISD::SRL: + case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; + } + + // When shifting right the arithmetic for Lo and Hi is swapped. + if (N->getOpcode() != ISD::SHL) + std::swap(InL, InH); + + // Use a little trick to get the bits that move from Lo to Hi. First + // shift by one bit. + SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, dl, ShTy)); + // Then compute the remaining shift with amount-1. + SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2); + + Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2); + + if (N->getOpcode() != ISD::SHL) + std::swap(Hi, Lo); + return true; + } + + return false; +} + +/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift +/// of any size. +bool DAGTypeLegalizer:: +ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); + unsigned NVTBits = NVT.getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + SDLoc dl(N); + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + SDValue NVBitsNode = DAG.getConstant(NVTBits, dl, ShTy); + SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode); + SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); + SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy), + Amt, NVBitsNode, ISD::SETULT); + SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy), + Amt, DAG.getConstant(0, dl, ShTy), + ISD::SETEQ); + + SDValue LoS, HiS, LoL, HiL; + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: + // Short: ShAmt < NVTBits + LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); + HiS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), + DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack)); + + // Long: ShAmt >= NVTBits + LoL = DAG.getConstant(0, dl, NVT); // Lo part is zero. + HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. + + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isZero, InH, + DAG.getSelect(dl, NVT, isShort, HiS, HiL)); + return true; + case ISD::SRL: + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getConstant(0, dl, NVT); // Hi part is zero. + LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. + + Lo = DAG.getSelect(dl, NVT, isZero, InL, + DAG.getSelect(dl, NVT, isShort, LoS, LoL)); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); + return true; + case ISD::SRA: + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part. + DAG.getConstant(NVTBits - 1, dl, ShTy)); + LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. + + Lo = DAG.getSelect(dl, NVT, isZero, InL, + DAG.getSelect(dl, NVT, isShort, LoS, LoL)); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); + return true; + } +} + +static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) { + + switch (Op) { + default: llvm_unreachable("invalid min/max opcode"); + case ISD::SMAX: + return std::make_pair(ISD::SETGT, ISD::UMAX); + case ISD::UMAX: + return std::make_pair(ISD::SETUGT, ISD::UMAX); + case ISD::SMIN: + return std::make_pair(ISD::SETLT, ISD::UMIN); + case ISD::UMIN: + return std::make_pair(ISD::SETULT, ISD::UMIN); + } +} + +void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc DL(N); + ISD::NodeType LoOpc; + ISD::CondCode CondC; + std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode()); + + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + + // Value types + EVT NVT = LHSL.getValueType(); + EVT CCT = getSetCCResultType(NVT); + + // Hi part is always the same op + Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH}); + + // We need to know whether to select Lo part that corresponds to 'winning' + // Hi part or if Hi parts are equal. + SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC); + SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ); + + // Lo part corresponding to the 'winning' Hi part + SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL); + + // Recursed Lo part if Hi parts are equal, this uses unsigned version + SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL}); + + Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + + EVT NVT = LHSL.getValueType(); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + bool HasOpCarry = TLI.isOperationLegalOrCustom( + N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (HasOpCarry) { + SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT)); + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); + } else { + Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); + } + return; + } + + // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support + // them. TODO: Teach operation legalization how to expand unsupported + // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate + // a carry of type MVT::Glue, but there doesn't seem to be any way to + // generate a value of this type in the expanded code sequence. + bool hasCarry = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::ADDC : ISD::SUBC, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + + if (hasCarry) { + SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps); + } + return; + } + + bool hasOVF = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::UADDO : ISD::USUBO, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + + if (hasOVF) { + EVT OvfVT = getSetCCResultType(NVT); + SDVTList VTList = DAG.getVTList(NVT, OvfVT); + int RevOpc; + if (N->getOpcode() == ISD::ADD) { + RevOpc = ISD::SUB; + Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); + } else { + RevOpc = ISD::ADD; + Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + } + SDValue OVF = Lo.getValue(1); + + switch (BoolType) { + case TargetLoweringBase::UndefinedBooleanContent: + OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF); + LLVM_FALLTHROUGH; + case TargetLoweringBase::ZeroOrOneBooleanContent: + OVF = DAG.getZExtOrTrunc(OVF, dl, NVT); + Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); + break; + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + OVF = DAG.getSExtOrTrunc(OVF, dl, NVT); + Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF); + } + return; + } + + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps); + Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); + SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], + ISD::SETULT); + + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) { + SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); + return; + } + + SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, + DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1], + ISD::SETULT); + SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2, + DAG.getConstant(1, dl, NVT), Carry1); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); + } else { + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); + Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + SDValue Cmp = + DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), + LoOps[0], LoOps[1], ISD::SETULT); + + SDValue Borrow; + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) + Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT); + else + Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); + + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); + } +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + if (N->getOpcode() == ISD::ADDC) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps); + } + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); + SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; + SDValue HiOps[3] = { LHSH, RHSH }; + + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDLoc dl(N); + + SDValue Ovf; + + bool HasOpCarry = TLI.isOperationLegalOrCustom( + N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY, + TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType())); + + if (HasOpCarry) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(LHS, LHSL, LHSH); + GetExpandedInteger(RHS, RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1)); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY; + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(Opc, dl, VTList, HiOps); + + Ovf = Hi.getValue(1); + } else { + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; + SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Calculate the overflow: addition overflows iff a + b < a, and subtraction + // overflows iff a - b > a. + auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond); + } + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ovf); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1)); + SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; + SDValue HiOps[3] = { LHSH, RHSH, SDValue() }; + + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is any extension of the input (which degenerates to a copy). + Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op); + Hi = DAG.getUNDEF(NVT); // The high part is undefined. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part replicates the sign bit of Lo, make it explicit. + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(NVTBits - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part must be zero, make it explicit. + Hi = DAG.getConstant(0, dl, NVT); + } +} + +void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned NBitWidth = NVT.getSizeInBits(); + auto Constant = cast<ConstantSDNode>(N); + const APInt &Cst = Constant->getAPIntValue(); + bool IsTarget = Constant->isTargetOpcode(); + bool IsOpaque = Constant->isOpaque(); + SDLoc dl(N); + Lo = DAG.getConstant(Cst.trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), dl, NVT, IsTarget, + IsOpaque); +} + +void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + + // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo) + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, + DAG.getConstant(0, dl, VT), N0); + SDValue NegLo, NegHi; + SplitInteger(Neg, NegLo, NegHi); + + GetExpandedInteger(N0, Lo, Hi); + EVT NVT = Lo.getValueType(); + SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), + DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT); + Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo); + Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + + SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi, + DAG.getConstant(0, dl, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); + + Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ, + DAG.getNode(ISD::ADD, dl, NVT, LoLZ, + DAG.getConstant(NVT.getSizeInBits(), dl, + NVT))); + Hi = DAG.getConstant(0, dl, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo), + DAG.getNode(ISD::CTPOP, dl, NVT, Hi)); + Hi = DAG.getConstant(0, dl, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + + SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, + DAG.getConstant(0, dl, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); + + Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ, + DAG.getNode(ISD::ADD, dl, NVT, HiLZ, + DAG.getConstant(NVT.getSizeInBits(), dl, + NVT))); + Hi = DAG.getConstant(0, dl, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned NBitWidth = NVT.getSizeInBits(); + + EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); + Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT); + // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy)); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo, + SDValue &Hi) { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + if (VT == MVT::f32) + LC = RTLIB::LLROUND_F32; + else if (VT == MVT::f64) + LC = RTLIB::LLROUND_F64; + else if (VT == MVT::f80) + LC = RTLIB::LLROUND_F80; + else if (VT == MVT::f128) + LC = RTLIB::LLROUND_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LLROUND_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!"); + + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + + SDLoc dl(N); + EVT RetVT = N->getValueType(0); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + if (VT == MVT::f32) + LC = RTLIB::LLRINT_F32; + else if (VT == MVT::f64) + LC = RTLIB::LLRINT_F64; + else if (VT == MVT::f80) + LC = RTLIB::LLRINT_F80; + else if (VT == MVT::f128) + LC = RTLIB::LLRINT_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LLRINT_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!"); + + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + + SDLoc dl(N); + EVT RetVT = N->getValueType(0); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, + SDValue &Lo, SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + ISD::LoadExtType ExtType = N->getExtensionType(); + unsigned Alignment = N->getAlignment(); + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); + AAMDNodes AAInfo = N->getAAInfo(); + SDLoc dl(N); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + EVT MemVT = N->getMemoryVT(); + + Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT, + Alignment, MMOFlags, AAInfo); + + // Remember the chain. + Ch = Lo.getValue(1); + + if (ExtType == ISD::SEXTLOAD) { + // The high part is obtained by SRA'ing all but one of the bits of the + // lo part. + unsigned LoSize = Lo.getValueSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + } else if (ExtType == ISD::ZEXTLOAD) { + // The high part is just a zero. + Hi = DAG.getConstant(0, dl, NVT); + } else { + assert(ExtType == ISD::EXTLOAD && "Unknown extload!"); + // The high part is undefined. + Hi = DAG.getUNDEF(NVT); + } + } else if (DAG.getDataLayout().isLittleEndian()) { + // Little-endian - low bits are at low addresses. + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), Alignment, MMOFlags, + AAInfo); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), NEVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + } else { + // Big-endian - high bits are at low addresses. Favor aligned loads at + // the cost of some bit-fiddling. + EVT MemVT = N->getMemoryVT(); + unsigned EBytes = MemVT.getStoreSize(); + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + + // Load both the high bits and maybe some of the low bits. + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), + EVT::getIntegerVT(*DAG.getContext(), + MemVT.getSizeInBits() - ExcessBits), + Alignment, MMOFlags, AAInfo); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + // Load the rest of the low bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer low bits from the bottom of Hi to the top of Lo. + Lo = DAG.getNode( + ISD::OR, dl, NVT, Lo, + DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(ExcessBits, dl, + TLI.getPointerTy(DAG.getDataLayout())))); + // Move high bits to the right position in Hi. + Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT, + Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + } + } + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); +} + +void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH); +} + +void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDLoc dl(N); + + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + + if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, + TargetLowering::MulExpansionKind::OnlyLegalOrCustom, + LL, LH, RL, RH)) + return; + + // If nothing else, we can make a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (VT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (VT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (VT == MVT::i128) + LC = RTLIB::MUL_I128; + + if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + // We'll expand the multiplication by brute force because we have no other + // options. This is a trivially-generalized version of the code from + // Hacker's Delight (itself derived from Knuth's Algorithm M from section + // 4.3.1). + unsigned Bits = NVT.getSizeInBits(); + unsigned HalfBits = Bits >> 1; + SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, + NVT); + SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask); + SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask); + + SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL); + SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask); + + EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); + if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) { + // The type from TLI is too small to fit the shift amount we want. + // Override it with i32. The shift will have to be legalized. + ShiftAmtTy = MVT::i32; + } + SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy); + SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift); + SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); + SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift); + + SDValue U = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH); + SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask); + SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift); + + SDValue V = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL); + SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift); + + SDValue W = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH), + DAG.getNode(ISD::ADD, dl, NVT, UH, VH)); + Lo = DAG.getNode(ISD::ADD, dl, NVT, TL, + DAG.getNode(ISD::SHL, dl, NVT, V, Shift)); + + Hi = DAG.getNode(ISD::ADD, dl, NVT, W, + DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, RH, LL), + DAG.getNode(ISD::MUL, dl, NVT, RL, LH))); + return; + } + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc DL(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other); + SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0)); + Lo = R.getValue(0); + Hi = R.getValue(1); + ReplaceValueWith(SDValue(N, 1), R.getValue(2)); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Result = TLI.expandAddSubSat(N, DAG); + SplitInteger(Result, Lo, Hi); +} + +/// This performs an expansion of the integer result for a fixed point +/// multiplication. The default expansion performs rounding down towards +/// negative infinity, though targets that do care about rounding should specify +/// a target hook for rounding and provide their own expansion or lowering of +/// fixed point multiplication to be consistent with rounding. +void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + unsigned VTSize = VT.getScalarSizeInBits(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + uint64_t Scale = N->getConstantOperandVal(2); + bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT || + N->getOpcode() == ISD::UMULFIXSAT); + bool Signed = (N->getOpcode() == ISD::SMULFIX || + N->getOpcode() == ISD::SMULFIXSAT); + + // Handle special case when scale is equal to zero. + if (!Scale) { + SDValue Result; + if (!Saturating) { + Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + } else { + EVT BoolVT = getSetCCResultType(VT); + unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO; + Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + SDValue Product = Result.getValue(0); + SDValue Overflow = Result.getValue(1); + if (Signed) { + APInt MinVal = APInt::getSignedMinValue(VTSize); + APInt MaxVal = APInt::getSignedMaxValue(VTSize); + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + Result = DAG.getSelect(dl, VT, Overflow, Result, Product); + } else { + // For unsigned multiplication, we only need to check the max since we + // can't really overflow towards zero. + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product); + } + } + SplitInteger(Result, Lo, Hi); + return; + } + + // For SMULFIX[SAT] we only expect to find Scale<VTSize, but this assert will + // cover for unhandled cases below, while still being valid for UMULFIX[SAT]. + assert(Scale <= VTSize && "Scale can't be larger than the value type size."); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue LL, LH, RL, RH; + GetExpandedInteger(LHS, LL, LH); + GetExpandedInteger(RHS, RL, RH); + SmallVector<SDValue, 4> Result; + + unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; + if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG, + TargetLowering::MulExpansionKind::OnlyLegalOrCustom, + LL, LH, RL, RH)) { + report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI."); + return; + } + + unsigned NVTSize = NVT.getScalarSizeInBits(); + assert((VTSize == NVTSize * 2) && "Expected the new value type to be half " + "the size of the current value type"); + EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); + + // After getting the multiplication result in 4 parts, we need to perform a + // shift right by the amount of the scale to get the result in that scale. + // + // Let's say we multiply 2 64 bit numbers. The resulting value can be held in + // 128 bits that are cut into 4 32-bit parts: + // + // HH HL LH LL + // |---32---|---32---|---32---|---32---| + // 128 96 64 32 0 + // + // |------VTSize-----| + // + // |NVTSize-| + // + // The resulting Lo and Hi would normally be in LL and LH after the shift. But + // to avoid unneccessary shifting of all 4 parts, we can adjust the shift + // amount and get Lo and Hi using two funnel shifts. Or for the special case + // when Scale is a multiple of NVTSize we can just pick the result without + // shifting. + uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed. + if (Scale % NVTSize) { + SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy); + Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0], + ShiftAmount); + Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1], + ShiftAmount); + } else { + Lo = Result[Part0]; + Hi = Result[Part0 + 1]; + } + + // Unless saturation is requested we are done. The result is in <Hi,Lo>. + if (!Saturating) + return; + + // Can not overflow when there is no integer part. + if (Scale == VTSize) + return; + + // To handle saturation we must check for overflow in the multiplication. + // + // Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result) + // aren't all zeroes. + // + // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result) + // aren't all ones or all zeroes. + // + // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the + // highest bit of HH determines saturation direction in the event of signed + // saturation. + + SDValue ResultHL = Result[2]; + SDValue ResultHH = Result[3]; + + SDValue SatMax, SatMin; + SDValue NVTZero = DAG.getConstant(0, dl, NVT); + SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT); + EVT BoolNVT = getSetCCResultType(NVT); + + if (!Signed) { + if (Scale < NVTSize) { + // Overflow happened if ((HH | (HL >> Scale)) != 0). + SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getConstant(Scale, dl, ShiftTy)); + SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH); + SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE); + } else if (Scale == NVTSize) { + // Overflow happened if (HH != 0). + SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE); + } else if (Scale < VTSize) { + // Overflow happened if ((HH >> (Scale - NVTSize)) != 0). + SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getConstant(Scale - NVTSize, dl, + ShiftTy)); + SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE); + } else + llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT" + "(and saturation can't happen with Scale==VTSize)."); + + Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi); + Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo); + return; + } + + if (Scale < NVTSize) { + // The number of overflow bits we can check are VTSize - Scale + 1 (we + // include the sign bit). If these top bits are > 0, then we overflowed past + // the max value. If these top bits are < -1, then we overflowed past the + // min value. Otherwise, we did not overflow. + unsigned OverflowBits = VTSize - Scale + 1; + assert(OverflowBits <= VTSize && OverflowBits > NVTSize && + "Extent of overflow bits must start within HL"); + SDValue HLHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT); + SDValue HLLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT); + // We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask). + SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT)); + // We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask). + SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT)); + } else if (Scale == NVTSize) { + // We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1). + SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg)); + // We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0). + SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos)); + } else if (Scale < VTSize) { + // This is similar to the case when we saturate if Scale < NVTSize, but we + // only need to check HH. + unsigned OverflowBits = VTSize - Scale + 1; + SDValue HHHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT); + SDValue HHLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT); + SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT); + SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT); + } else + llvm_unreachable("Illegal scale for signed fixed point mul."); + + // Saturate to signed maximum. + APInt MaxHi = APInt::getSignedMaxValue(NVTSize); + APInt MaxLo = APInt::getAllOnesValue(NVTSize); + Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi); + Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo); + // Saturate to signed minimum. + APInt MinHi = APInt::getSignedMinValue(NVTSize); + Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi); + Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); +} + +void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDLoc dl(Node); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Compute the overflow. + // + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + EVT OType = Node->getValueType(1); + SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); + + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(Node, 1), Cmp); +} + +void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(0), Lo, Hi); + return; + } + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::SDIV_I16; + else if (VT == MVT::i32) + LC = RTLIB::SDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::SDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::SDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); + + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + + // If we can emit an efficient shift operation, do so now. Check to see if + // the RHS is a constant. + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return ExpandShiftByConstant(N, CN->getAPIntValue(), Lo, Hi); + + // If we can determine that the high bit of the shift is zero or one, even if + // the low bits are variable, emit this shift in an optimized form. + if (ExpandShiftWithKnownAmountBit(N, Lo, Hi)) + return; + + // If this target supports shift_PARTS, use it. First, map to the _PARTS opc. + unsigned PartsOpc; + if (N->getOpcode() == ISD::SHL) { + PartsOpc = ISD::SHL_PARTS; + } else if (N->getOpcode() == ISD::SRL) { + PartsOpc = ISD::SRL_PARTS; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + PartsOpc = ISD::SRA_PARTS; + } + + // Next check to see if the target supports this SHL_PARTS operation or if it + // will custom expand it. Don't lower this to SHL_PARTS when we optimise for + // size, but create a libcall instead. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT); + const bool LegalOrCustom = + (Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || + Action == TargetLowering::Custom; + + if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) { + // Expand the subcomponents. + SDValue LHSL, LHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + EVT VT = LHSL.getValueType(); + + // If the shift amount operand is coming from a vector legalization it may + // have an illegal type. Fix that first by casting the operand, otherwise + // the new SHL_PARTS operation would need further legalization. + SDValue ShiftOp = N->getOperand(1); + EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + assert(ShiftTy.getScalarSizeInBits() >= + Log2_32_Ceil(VT.getScalarSizeInBits()) && + "ShiftAmountTy is too small to cover the range of this type!"); + if (ShiftOp.getValueType() != ShiftTy) + ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); + + SDValue Ops[] = { LHSL, LHSH, ShiftOp }; + Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops); + Hi = Lo.getValue(1); + return; + } + + // Otherwise, emit a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + bool isSigned; + if (N->getOpcode() == ISD::SHL) { + isSigned = false; /*sign irrelevant*/ + if (VT == MVT::i16) + LC = RTLIB::SHL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SHL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SHL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SHL_I128; + } else if (N->getOpcode() == ISD::SRL) { + isSigned = false; + if (VT == MVT::i16) + LC = RTLIB::SRL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRL_I128; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + isSigned = true; + if (VT == MVT::i16) + LC = RTLIB::SRA_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRA_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRA_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRA_I128; + } + + if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); + return; + } + + if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi)) + llvm_unreachable("Unsupported shift!"); +} + +void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is sign extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0)); + // The high part is obtained by SRA'ing all but one of the bits of low part. + unsigned LoSize = NVT.getSizeInBits(); + Hi = DAG.getNode( + ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); + } +} + +void DAGTypeLegalizer:: +ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + + if (EVT.bitsLE(Lo.getValueType())) { + // sext_inreg the low part if needed. + Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo, + N->getOperand(1)); + + // The high part gets the sign extension from the lo-part. This handles + // things like sextinreg V:i64 from i8. + Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, + DAG.getConstant(Hi.getValueSizeInBits() - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + } else { + // For example, extension of an i48 to an i64. Leave the low part alone, + // sext_inreg the high part. + unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); + } +} + +void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(1), Lo, Hi); + return; + } + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::SREM_I16; + else if (VT == MVT::i32) + LC = RTLIB::SREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::SREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::SREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); + + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); + Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(), + N->getOperand(0), + DAG.getConstant(NVT.getSizeInBits(), dl, + TLI.getPointerTy(DAG.getDataLayout()))); + Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + + if (N->getOpcode() == ISD::UMULO) { + // This section expands the operation into the following sequence of + // instructions. `iNh` here refers to a type which has half the bit width of + // the type the original operation operated on. + // + // %0 = %LHS.HI != 0 && %RHS.HI != 0 + // %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO) + // %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO) + // %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN) + // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh + // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 ) + // + // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 } + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + SDValue LHSHigh, LHSLow, RHSHigh, RHSLow; + SplitInteger(LHS, LHSLow, LHSHigh); + SplitInteger(RHS, RHSLow, RHSHigh); + EVT HalfVT = LHSLow.getValueType() + , BitVT = N->getValueType(1); + SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT); + SDVTList VTFullAddO = DAG.getVTList(VT, BitVT); + + SDValue HalfZero = DAG.getConstant(0, dl, HalfVT); + SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT, + DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE), + DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE)); + + SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1)); + SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, + One.getValue(0)); + + SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1)); + SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, + Two.getValue(0)); + + // Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not + // know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this + // operation recursively legalized?). + // + // Many backends understand this pattern and will convert into LOHI + // themselves, if applicable. + SDValue Three = DAG.getNode(ISD::MUL, dl, VT, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow), + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow)); + SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh); + SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1)); + SplitInteger(Five, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Overflow); + return; + } + + Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + + // Replace this with a libcall that will check overflow. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::MULO_I32; + else if (VT == MVT::i64) + LC = RTLIB::MULO_I64; + else if (VT == MVT::i128) + LC = RTLIB::MULO_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!"); + + SDValue Temp = DAG.CreateStackTemporary(PtrVT); + // Temporary for the overflow value, default it to zero. + SDValue Chain = + DAG.getStore(DAG.getEntryNode(), dl, DAG.getConstant(0, dl, PtrVT), Temp, + MachinePointerInfo()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (const SDValue &Op : N->op_values()) { + EVT ArgVT = Op.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op; + Entry.Ty = ArgTy; + Entry.IsSExt = true; + Entry.IsZExt = false; + Args.push_back(Entry); + } + + // Also pass the address of the overflow check. + Entry.Node = Temp; + Entry.Ty = PtrTy->getPointerTo(); + Entry.IsSExt = true; + Entry.IsZExt = false; + Args.push_back(Entry); + + SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args)) + .setSExtResult(); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + SplitInteger(CallInfo.first, Lo, Hi); + SDValue Temp2 = + DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, MachinePointerInfo()); + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, + DAG.getConstant(0, dl, PtrVT), + ISD::SETNE); + // Use the overflow from the libcall everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + +void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(0), Lo, Hi); + return; + } + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::UDIV_I16; + else if (VT == MVT::i32) + LC = RTLIB::UDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::UDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::UDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); + + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(1), Lo, Hi); + return; + } + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::UREM_I16; + else if (VT == MVT::i32) + LC = RTLIB::UREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::UREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::UREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); + + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is zero extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); + Hi = DAG.getConstant(0, dl, NVT); // The high part is just a zero. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getZeroExtendInReg(Hi, dl, + EVT::getIntegerVT(*DAG.getContext(), + ExcessBits)); + } +} + +void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); + SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, + cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0), + N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand()); + + ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); + ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); +} + +void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // TODO For VECREDUCE_(AND|OR|XOR) we could split the vector and calculate + // both halves independently. + SDValue Res = TLI.expandVecReduce(N, DAG); + SplitInteger(Res, Lo, Hi); +} + +//===----------------------------------------------------------------------===// +// Integer Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need expansion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { + LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + report_fatal_error("Do not know how to expand this operator's operand!"); + + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; + case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; + case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; + case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break; + case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; + case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; + case ISD::RETURNADDR: + case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break; + + case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + const SDLoc &dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(NewLHS, LHSLo, LHSHi); + GetExpandedInteger(NewRHS, RHSLo, RHSHi); + + if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { + if (RHSLo == RHSHi) { + if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) { + if (RHSCST->isAllOnesValue()) { + // Equality comparison to -1. + NewLHS = DAG.getNode(ISD::AND, dl, + LHSLo.getValueType(), LHSLo, LHSHi); + NewRHS = RHSLo; + return; + } + } + } + + NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo); + NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi); + NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS); + NewRHS = DAG.getConstant(0, dl, NewLHS.getValueType()); + return; + } + + // If this is a comparison of the sign bit, just look at the top part. + // X > -1, x < 0 + if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS)) + if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0 + (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1 + NewLHS = LHSHi; + NewRHS = RHSHi; + return; + } + + // FIXME: This generated code sucks. + ISD::CondCode LowCC; + switch (CCCode) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETLT: + case ISD::SETULT: LowCC = ISD::SETULT; break; + case ISD::SETGT: + case ISD::SETUGT: LowCC = ISD::SETUGT; break; + case ISD::SETLE: + case ISD::SETULE: LowCC = ISD::SETULE; break; + case ISD::SETGE: + case ISD::SETUGE: LowCC = ISD::SETUGE; break; + } + + // LoCmp = lo(op1) < lo(op2) // Always unsigned comparison + // HiCmp = hi(op1) < hi(op2) // Signedness depends on operands + // dest = hi(op1) == hi(op2) ? LoCmp : HiCmp; + + // NOTE: on targets without efficient SELECT of bools, we can always use + // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, + nullptr); + SDValue LoCmp, HiCmp; + if (TLI.isTypeLegal(LHSLo.getValueType()) && + TLI.isTypeLegal(RHSLo.getValueType())) + LoCmp = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo, + RHSLo, LowCC, false, DagCombineInfo, dl); + if (!LoCmp.getNode()) + LoCmp = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, + RHSLo, LowCC); + if (TLI.isTypeLegal(LHSHi.getValueType()) && + TLI.isTypeLegal(RHSHi.getValueType())) + HiCmp = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, + RHSHi, CCCode, false, DagCombineInfo, dl); + if (!HiCmp.getNode()) + HiCmp = + DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, DAG.getCondCode(CCCode)); + + ConstantSDNode *LoCmpC = dyn_cast<ConstantSDNode>(LoCmp.getNode()); + ConstantSDNode *HiCmpC = dyn_cast<ConstantSDNode>(HiCmp.getNode()); + + bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE || + CCCode == ISD::SETUGE || CCCode == ISD::SETULE); + + if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) || + (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) || + (LoCmpC && LoCmpC->isNullValue())))) { + // For LE / GE, if high part is known false, ignore the low part. + // For LT / GT: if low part is known false, return the high part. + // if high part is known true, ignore the low part. + NewLHS = HiCmp; + NewRHS = SDValue(); + return; + } + + if (LHSHi == RHSHi) { + // Comparing the low bits is enough. + NewLHS = LoCmp; + NewRHS = SDValue(); + return; + } + + // Lower with SETCCCARRY if the target supports it. + EVT HiVT = LHSHi.getValueType(); + EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT); + bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT); + + // FIXME: Make all targets support this, then remove the other lowering. + if (HasSETCCCARRY) { + // SETCCCARRY can detect < and >= directly. For > and <=, flip + // operands and condition code. + bool FlipOperands = false; + switch (CCCode) { + case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break; + case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break; + case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break; + case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break; + default: break; + } + if (FlipOperands) { + std::swap(LHSLo, RHSLo); + std::swap(LHSHi, RHSHi); + } + // Perform a wide subtraction, feeding the carry from the low part into + // SETCCCARRY. The SETCCCARRY operation is essentially looking at the high + // part of the result of LHS - RHS. It is negative iff LHS < RHS. It is + // zero or positive iff LHS >= RHS. + EVT LoVT = LHSLo.getValueType(); + SDVTList VTList = DAG.getVTList(LoVT, getSetCCResultType(LoVT)); + SDValue LowCmp = DAG.getNode(ISD::USUBO, dl, VTList, LHSLo, RHSLo); + SDValue Res = DAG.getNode(ISD::SETCCCARRY, dl, getSetCCResultType(HiVT), + LHSHi, RHSHi, LowCmp.getValue(1), + DAG.getCondCode(CCCode)); + NewLHS = Res; + NewRHS = SDValue(); + return; + } + + NewLHS = TLI.SimplifySetCC(getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ, + false, DagCombineInfo, dl); + if (!NewLHS.getNode()) + NewLHS = + DAG.getSetCC(dl, getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ); + NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), NewLHS, LoCmp, HiCmp); + NewRHS = SDValue(); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, use it. + if (!NewRHS.getNode()) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue( + DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + SDLoc dl = SDLoc(N); + + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(LHS, LHSLo, LHSHi); + GetExpandedInteger(RHS, RHSLo, RHSHi); + + // Expand to a SUBE for the low part and a smaller SETCCCARRY for the high. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType()); + SDValue LowCmp = DAG.getNode(ISD::SUBCARRY, dl, VTList, LHSLo, RHSLo, Carry); + return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi, + LowCmp.getValue(1), Cond); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { + // The value being shifted is legal, but the shift amount is too big. + // It follows that either the result of the shift is undefined, or the + // upper half of the shift amount is zero. Just use the lower half. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(1), Lo, Hi); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { + // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This + // surely makes pretty nice problems on 8/16 bit targets. Just truncate this + // constant to valid type. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(0), Lo, Hi); + return SDValue(DAG.UpdateNodeOperands(N, Lo), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + EVT DstVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this SINT_TO_FP!"); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + + EVT VT = N->getOperand(1).getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + unsigned Alignment = N->getAlignment(); + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); + AAMDNodes AAInfo = N->getAAInfo(); + SDLoc dl(N); + SDValue Lo, Hi; + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + GetExpandedInteger(N->getValue(), Lo, Hi); + return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), + N->getMemoryVT(), Alignment, MMOFlags, AAInfo); + } + + if (DAG.getDataLayout().isLittleEndian()) { + // Little-endian - low bits are at low addresses. + GetExpandedInteger(N->getValue(), Lo, Hi); + + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, + AAInfo); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Hi = DAG.getTruncStore( + Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } + + // Big-endian - high bits are at low addresses. Favor aligned stores at + // the cost of some bit-fiddling. + GetExpandedInteger(N->getValue(), Lo, Hi); + + EVT ExtVT = N->getMemoryVT(); + unsigned EBytes = ExtVT.getStoreSize(); + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), + ExtVT.getSizeInBits() - ExcessBits); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer high bits from the top of Lo to the bottom of Hi. + Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + Hi = DAG.getNode( + ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SRL, dl, NVT, Lo, + DAG.getConstant(ExcessBits, dl, + TLI.getPointerTy(DAG.getDataLayout())))); + } + + // Store both the high bits and maybe some of the low bits. + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, Alignment, + MMOFlags, AAInfo); + + // Increment the pointer to the other half. + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + // Store the lowest ExcessBits bits in the second half. + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + // Just truncate the low part of the source. + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + EVT SrcVT = Op.getValueType(); + EVT DstVT = N->getValueType(0); + SDLoc dl(N); + + // The following optimization is valid only if every value in SrcVT (when + // treated as signed) is representable in DstVT. Check that the mantissa + // size of DstVT is >= than the number of bits in SrcVT -1. + const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT); + if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 && + TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ + // Do a signed conversion then adjust the result. + SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); + SignedConv = TLI.LowerOperation(SignedConv, DAG); + + // The result of the signed conversion needs adjusting if the 'sign bit' of + // the incoming integer was set. To handle this, we dynamically test to see + // if it is set, and, if so, add a fudge factor. + + const uint64_t F32TwoE32 = 0x4F800000ULL; + const uint64_t F32TwoE64 = 0x5F800000ULL; + const uint64_t F32TwoE128 = 0x7F800000ULL; + + APInt FF(32, 0); + if (SrcVT == MVT::i32) + FF = APInt(32, F32TwoE32); + else if (SrcVT == MVT::i64) + FF = APInt(32, F32TwoE64); + else if (SrcVT == MVT::i128) + FF = APInt(32, F32TwoE128); + else + llvm_unreachable("Unsupported UINT_TO_FP!"); + + // Check whether the sign bit is set. + SDValue Lo, Hi; + GetExpandedInteger(Op, Lo, Hi); + SDValue SignSet = DAG.getSetCC(dl, + getSetCCResultType(Hi.getValueType()), + Hi, + DAG.getConstant(0, dl, Hi.getValueType()), + ISD::SETLT); + + // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. + SDValue FudgePtr = + DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)), + TLI.getPointerTy(DAG.getDataLayout())); + + // Get a pointer to FF if the sign bit was set, or to 0 otherwise. + SDValue Zero = DAG.getIntPtrConstant(0, dl); + SDValue Four = DAG.getIntPtrConstant(4, dl); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Zero, Four); + SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, + Zero, Four); + unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); + FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(), + FudgePtr, Offset); + Alignment = std::min(Alignment, 4u); + + // Load the value out, extending it from f32 to the destination float type. + // FIXME: Avoid the extend by constructing the right constant pool? + SDValue Fudge = DAG.getExtLoad( + ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, + Alignment); + return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); + } + + // Otherwise, use a libcall. + RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this UINT_TO_FP!"); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first; +} + +SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { + SDLoc dl(N); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + cast<AtomicSDNode>(N)->getMemoryVT(), + N->getOperand(0), + N->getOperand(1), N->getOperand(2), + cast<AtomicSDNode>(N)->getMemOperand()); + return Swap.getValue(1); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned OutNumElems = OutVT.getVectorNumElements(); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + SDLoc dl(N); + SDValue BaseIdx = N->getOperand(1); + + SDValue InOp0 = N->getOperand(0); + if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger) + InOp0 = GetPromotedInteger(N->getOperand(0)); + + EVT InVT = InOp0.getValueType(); + + SmallVector<SDValue, 8> Ops; + Ops.reserve(OutNumElems); + for (unsigned i = 0; i != OutNumElems; ++i) { + + // Extract the element from the original vector. + SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), + BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType())); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InVT.getVectorElementType(), N->getOperand(0), Index); + + SDValue Op = DAG.getAnyExtOrTrunc(Ext, dl, NOutVTElem); + // Insert the converted element to the new vector. + Ops.push_back(Op); + } + + return DAG.getBuildVector(NOutVT, dl, Ops); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { + ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N); + EVT VT = N->getValueType(0); + SDLoc dl(N); + + ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements()); + + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + SDValue V1 = GetPromotedInteger(N->getOperand(1)); + EVT OutVT = V0.getValueType(); + + return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned NumElems = N->getNumOperands(); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + SDLoc dl(N); + + SmallVector<SDValue, 8> Ops; + Ops.reserve(NumElems); + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Op; + // BUILD_VECTOR integer operand types are allowed to be larger than the + // result's element type. This may still be true after the promotion. For + // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to + // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>. + if (N->getOperand(i).getValueType().bitsLT(NOutVTElem)) + Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + else + Op = N->getOperand(i); + Ops.push_back(Op); + } + + return DAG.getBuildVector(NOutVT, dl, Ops); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { + + SDLoc dl(N); + + assert(!N->getOperand(0).getValueType().isVector() && + "Input must be a scalar"); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0)); + + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) { + SDLoc dl(N); + + SDValue SplatVal = N->getOperand(0); + + assert(!SplatVal.getValueType().isVector() && "Input must be a scalar"); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "Type must be promoted to a vector type"); + EVT NOutElemVT = NOutVT.getVectorElementType(); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal); + + return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { + SDLoc dl(N); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + + EVT OutElemTy = NOutVT.getVectorElementType(); + + unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); + unsigned NumOutElem = NOutVT.getVectorNumElements(); + unsigned NumOperands = N->getNumOperands(); + assert(NumElem * NumOperands == NumOutElem && + "Unexpected number of elements"); + + // Take the elements from the first vector. + SmallVector<SDValue, 8> Ops(NumOutElem); + for (unsigned i = 0; i < NumOperands; ++i) { + SDValue Op = N->getOperand(i); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteInteger) + Op = GetPromotedInteger(Op); + EVT SclrTy = Op.getValueType().getVectorElementType(); + assert(NumElem == Op.getValueType().getVectorNumElements() && + "Unexpected number of elements"); + + for (unsigned j = 0; j < NumElem; ++j) { + SDValue Ext = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op, + DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy); + } + } + + return DAG.getBuildVector(NOutVT, dl, Ops); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + assert(NVT.isVector() && "This type must be promoted to a vector type"); + + SDLoc dl(N); + + // For operands whose TypeAction is to promote, extend the promoted node + // appropriately (ZERO_EXTEND or SIGN_EXTEND) from the original pre-promotion + // type, and then construct a new *_EXTEND_VECTOR_INREG node to the promote-to + // type.. + if (getTypeAction(N->getOperand(0).getValueType()) + == TargetLowering::TypePromoteInteger) { + SDValue Promoted; + + switch(N->getOpcode()) { + case ISD::SIGN_EXTEND_VECTOR_INREG: + Promoted = SExtPromotedInteger(N->getOperand(0)); + break; + case ISD::ZERO_EXTEND_VECTOR_INREG: + Promoted = ZExtPromotedInteger(N->getOperand(0)); + break; + case ISD::ANY_EXTEND_VECTOR_INREG: + Promoted = GetPromotedInteger(N->getOperand(0)); + break; + default: + llvm_unreachable("Node has unexpected Opcode"); + } + return DAG.getNode(N->getOpcode(), dl, NVT, Promoted); + } + + // Directly extend to the appropriate transform-to type. + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + + EVT NOutVTElem = NOutVT.getVectorElementType(); + + SDLoc dl(N); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + + SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, + NOutVTElem, N->getOperand(1)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT, + V0, ConvElem, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) { + // The VECREDUCE result size may be larger than the element size, so + // we can simply change the result type. + SDLoc dl(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDLoc dl(N); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl, + TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + V0->getValueType(0).getScalarType(), V0, V1); + + // EXTRACT_VECTOR_ELT can return types which are wider than the incoming + // element types. If this is the case then we need to expand the outgoing + // value and not truncate it. + return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) { + SDLoc dl(N); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + MVT InVT = V0.getValueType().getSimpleVT(); + MVT OutVT = MVT::getVectorVT(InVT.getVectorElementType(), + N->getValueType(0).getVectorNumElements()); + SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, V0, N->getOperand(1)); + return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { + SDLoc dl(N); + unsigned NumElems = N->getNumOperands(); + + EVT RetSclrTy = N->getValueType(0).getVectorElementType(); + + SmallVector<SDValue, 8> NewOps; + NewOps.reserve(NumElems); + + // For each incoming vector + for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) { + SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx)); + EVT SclrTy = Incoming->getValueType(0).getVectorElementType(); + unsigned NumElem = Incoming->getValueType(0).getVectorNumElements(); + + for (unsigned i=0; i<NumElem; ++i) { + // Extract element from incoming vector + SDValue Ex = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); + NewOps.push_back(Tr); + } + } + + return DAG.getBuildVector(N->getValueType(0), dl, NewOps); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp new file mode 100644 index 0000000000000..b596c174a2877 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -0,0 +1,1092 @@ +//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeTypes method. It transforms +// an arbitrary well-formed SelectionDAG to only consist of legal types. This +// is common code shared among the LegalizeTypes*.cpp files. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "legalize-types" + +static cl::opt<bool> +EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); + +/// Do extensive, expensive, sanity checking. +void DAGTypeLegalizer::PerformExpensiveChecks() { + // If a node is not processed, then none of its values should be mapped by any + // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + + // If a node is processed, then each value with an illegal type must be mapped + // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + // Values with a legal type may be mapped by ReplacedValues, but not by any of + // the other maps. + + // Note that these invariants may not hold momentarily when processing a node: + // the node being processed may be put in a map before being marked Processed. + + // Note that it is possible to have nodes marked NewNode in the DAG. This can + // occur in two ways. Firstly, a node may be created during legalization but + // never passed to the legalization core. This is usually due to the implicit + // folding that occurs when using the DAG.getNode operators. Secondly, a new + // node may be passed to the legalization core, but when analyzed may morph + // into a different node, leaving the original node as a NewNode in the DAG. + // A node may morph if one of its operands changes during analysis. Whether + // it actually morphs or not depends on whether, after updating its operands, + // it is equivalent to an existing node: if so, it morphs into that existing + // node (CSE). An operand can change during analysis if the operand is a new + // node that morphs, or it is a processed value that was mapped to some other + // value (as recorded in ReplacedValues) in which case the operand is turned + // into that other value. If a node morphs then the node it morphed into will + // be used instead of it for legalization, however the original node continues + // to live on in the DAG. + // The conclusion is that though there may be nodes marked NewNode in the DAG, + // all uses of such nodes are also marked NewNode: the result is a fungus of + // NewNodes growing on top of the useful nodes, and perhaps using them, but + // not used by them. + + // If a value is mapped by ReplacedValues, then it must have no uses, except + // by nodes marked NewNode (see above). + + // The final node obtained by mapping by ReplacedValues is not marked NewNode. + // Note that ReplacedValues should be applied iteratively. + + // Note that the ReplacedValues map may also map deleted nodes (by iterating + // over the DAG we never dereference deleted nodes). This means that it may + // also map nodes marked NewNode if the deallocated memory was reallocated as + // another node, and that new node was not seen by the LegalizeTypes machinery + // (for example because it was created but not used). In general, we cannot + // distinguish between new nodes and deleted nodes. + SmallVector<SDNode*, 16> NewNodes; + for (SDNode &Node : DAG.allnodes()) { + // Remember nodes marked NewNode - they are subject to extra checking below. + if (Node.getNodeId() == NewNode) + NewNodes.push_back(&Node); + + for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { + SDValue Res(&Node, i); + bool Failed = false; + // Don't create a value in map. + auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0; + + unsigned Mapped = 0; + if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) { + Mapped |= 1; + // Check that remapped values are only used by nodes marked NewNode. + for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); + UI != UE; ++UI) + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + + // Check that the final result of applying ReplacedValues is not + // marked NewNode. + auto NewValId = ReplacedValues[ResId]; + auto I = ReplacedValues.find(NewValId); + while (I != ReplacedValues.end()) { + NewValId = I->second; + I = ReplacedValues.find(NewValId); + } + SDValue NewVal = getSDValue(NewValId); + (void)NewVal; + assert(NewVal.getNode()->getNodeId() != NewNode && + "ReplacedValues maps to a new node!"); + } + if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end()) + Mapped |= 2; + if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end()) + Mapped |= 4; + if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end()) + Mapped |= 8; + if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end()) + Mapped |= 16; + if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end()) + Mapped |= 32; + if (ResId && SplitVectors.find(ResId) != SplitVectors.end()) + Mapped |= 64; + if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end()) + Mapped |= 128; + if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end()) + Mapped |= 256; + + if (Node.getNodeId() != Processed) { + // Since we allow ReplacedValues to map deleted nodes, it may map nodes + // marked NewNode too, since a deleted node may have been reallocated as + // another node that has not been seen by the LegalizeTypes machinery. + if ((Node.getNodeId() == NewNode && Mapped > 1) || + (Node.getNodeId() != NewNode && Mapped != 0)) { + dbgs() << "Unprocessed value in a map!"; + Failed = true; + } + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { + if (Mapped > 1) { + dbgs() << "Value with legal type was transformed!"; + Failed = true; + } + } else { + if (Mapped == 0) { + dbgs() << "Processed value not in any map!"; + Failed = true; + } else if (Mapped & (Mapped - 1)) { + dbgs() << "Value in multiple maps!"; + Failed = true; + } + } + + if (Failed) { + if (Mapped & 1) + dbgs() << " ReplacedValues"; + if (Mapped & 2) + dbgs() << " PromotedIntegers"; + if (Mapped & 4) + dbgs() << " SoftenedFloats"; + if (Mapped & 8) + dbgs() << " ScalarizedVectors"; + if (Mapped & 16) + dbgs() << " ExpandedIntegers"; + if (Mapped & 32) + dbgs() << " ExpandedFloats"; + if (Mapped & 64) + dbgs() << " SplitVectors"; + if (Mapped & 128) + dbgs() << " WidenedVectors"; + if (Mapped & 256) + dbgs() << " PromotedFloats"; + dbgs() << "\n"; + llvm_unreachable(nullptr); + } + } + } + + // Checked that NewNodes are only used by other NewNodes. + for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { + SDNode *N = NewNodes[i]; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); + } +} + +/// This is the main entry point for the type legalizer. This does a top-down +/// traversal of the dag, legalizing types as it goes. Returns "true" if it made +/// any changes. +bool DAGTypeLegalizer::run() { + bool Changed = false; + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + Dummy.setNodeId(Unanalyzed); + + // The root of the dag may dangle to deleted nodes until the type legalizer is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess' + // (and remembering them) if they are leaves and assigning 'Unanalyzed' if + // non-leaves. + for (SDNode &Node : DAG.allnodes()) { + if (Node.getNumOperands() == 0) { + AddToWorklist(&Node); + } else { + Node.setNodeId(Unanalyzed); + } + } + + // Now that we have a set of nodes to process, handle them all. + while (!Worklist.empty()) { +#ifndef EXPENSIVE_CHECKS + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + SDNode *N = Worklist.back(); + Worklist.pop_back(); + assert(N->getNodeId() == ReadyToProcess && + "Node should be ready if on worklist!"); + + LLVM_DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG)); + if (IgnoreNodeResults(N)) { + LLVM_DEBUG(dbgs() << "Ignoring node results\n"); + goto ScanOperands; + } + + // Scan the values produced by the node, checking to see if any result + // types are illegal. + for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { + EVT ResultVT = N->getValueType(i); + LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT.getEVTString() + << "\n"); + switch (getTypeAction(ResultVT)) { + case TargetLowering::TypeLegal: + LLVM_DEBUG(dbgs() << "Legal result type\n"); + break; + // The following calls must take care of *all* of the node's results, + // not just the illegal result they were passed (this includes results + // with a legal type). Results can be remapped using ReplaceValueWith, + // or their promoted/expanded/etc values registered in PromotedIntegers, + // ExpandedIntegers etc. + case TargetLowering::TypePromoteInteger: + PromoteIntegerResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeExpandInteger: + ExpandIntegerResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeSoftenFloat: + SoftenFloatResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeExpandFloat: + ExpandFloatResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeScalarizeVector: + ScalarizeVectorResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeSplitVector: + SplitVectorResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeWidenVector: + WidenVectorResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypePromoteFloat: + PromoteFloatResult(N, i); + Changed = true; + goto NodeDone; + } + } + +ScanOperands: + // Scan the operand list for the node, handling any nodes with operands that + // are illegal. + { + unsigned NumOperands = N->getNumOperands(); + bool NeedsReanalyzing = false; + unsigned i; + for (i = 0; i != NumOperands; ++i) { + if (IgnoreNodeResults(N->getOperand(i).getNode())) + continue; + + const auto Op = N->getOperand(i); + LLVM_DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG)); + EVT OpVT = Op.getValueType(); + switch (getTypeAction(OpVT)) { + case TargetLowering::TypeLegal: + LLVM_DEBUG(dbgs() << "Legal operand\n"); + continue; + // The following calls must either replace all of the node's results + // using ReplaceValueWith, and return "false"; or update the node's + // operands in place, and return "true". + case TargetLowering::TypePromoteInteger: + NeedsReanalyzing = PromoteIntegerOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeExpandInteger: + NeedsReanalyzing = ExpandIntegerOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeSoftenFloat: + NeedsReanalyzing = SoftenFloatOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeExpandFloat: + NeedsReanalyzing = ExpandFloatOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeScalarizeVector: + NeedsReanalyzing = ScalarizeVectorOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeSplitVector: + NeedsReanalyzing = SplitVectorOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeWidenVector: + NeedsReanalyzing = WidenVectorOperand(N, i); + Changed = true; + break; + case TargetLowering::TypePromoteFloat: + NeedsReanalyzing = PromoteFloatOperand(N, i); + Changed = true; + break; + } + break; + } + + // The sub-method updated N in place. Check to see if any operands are new, + // and if so, mark them. If the node needs revisiting, don't add all users + // to the worklist etc. + if (NeedsReanalyzing) { + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + + N->setNodeId(NewNode); + // Recompute the NodeId and correct processed operands, adding the node to + // the worklist if ready. + SDNode *M = AnalyzeNewNode(N); + if (M == N) + // The node didn't morph - nothing special to do, it will be revisited. + continue; + + // The node morphed - this is equivalent to legalizing by replacing every + // value of N with the corresponding value of M. So do that now. + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + // Replacing the value takes care of remapping the new value. + ReplaceValueWith(SDValue(N, i), SDValue(M, i)); + assert(N->getNodeId() == NewNode && "Unexpected node state!"); + // The node continues to live on as part of the NewNode fungus that + // grows on top of the useful nodes. Nothing more needs to be done + // with it - move on to the next node. + continue; + } + + if (i == NumOperands) { + LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); + dbgs() << "\n"); + } + } +NodeDone: + + // If we reach here, the node was processed, potentially creating new nodes. + // Mark it as processed and add its users to the worklist as appropriate. + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(Processed); + + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + int NodeId = User->getNodeId(); + + // This node has two options: it can either be a new node or its Node ID + // may be a count of the number of operands it has that are not ready. + if (NodeId > 0) { + User->setNodeId(NodeId-1); + + // If this was the last use it was waiting on, add it to the ready list. + if (NodeId-1 == ReadyToProcess) + Worklist.push_back(User); + continue; + } + + // If this is an unreachable new node, then ignore it. If it ever becomes + // reachable by being used by a newly created node then it will be handled + // by AnalyzeNewNode. + if (NodeId == NewNode) + continue; + + // Otherwise, this node is new: this is the first operand of it that + // became ready. Its new NodeId is the number of operands it has minus 1 + // (as this node is now processed). + assert(NodeId == Unanalyzed && "Unknown node ID!"); + User->setNodeId(User->getNumOperands() - 1); + + // If the node only has a single operand, it is now ready. + if (User->getNumOperands() == 1) + Worklist.push_back(User); + } + } + +#ifndef EXPENSIVE_CHECKS + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + // If the root changed (e.g. it was a dead load) update the root. + DAG.setRoot(Dummy.getValue()); + + // Remove dead nodes. This is important to do for cleanliness but also before + // the checking loop below. Implicit folding by the DAG.getNode operators and + // node morphing can cause unreachable nodes to be around with their flags set + // to new. + DAG.RemoveDeadNodes(); + + // In a debug build, scan all the nodes to make sure we found them all. This + // ensures that there are no cycles and that everything got processed. +#ifndef NDEBUG + for (SDNode &Node : DAG.allnodes()) { + bool Failed = false; + + // Check that all result types are legal. + if (!IgnoreNodeResults(&Node)) + for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) + if (!isTypeLegal(Node.getValueType(i))) { + dbgs() << "Result type " << i << " illegal: "; + Node.dump(&DAG); + Failed = true; + } + + // Check that all operand types are legal. + for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) + if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && + !isTypeLegal(Node.getOperand(i).getValueType())) { + dbgs() << "Operand type " << i << " illegal: "; + Node.getOperand(i).dump(&DAG); + Failed = true; + } + + if (Node.getNodeId() != Processed) { + if (Node.getNodeId() == NewNode) + dbgs() << "New node not analyzed?\n"; + else if (Node.getNodeId() == Unanalyzed) + dbgs() << "Unanalyzed node not noticed?\n"; + else if (Node.getNodeId() > 0) + dbgs() << "Operand not processed?\n"; + else if (Node.getNodeId() == ReadyToProcess) + dbgs() << "Not added to worklist?\n"; + Failed = true; + } + + if (Failed) { + Node.dump(&DAG); dbgs() << "\n"; + llvm_unreachable(nullptr); + } + } +#endif + + return Changed; +} + +/// The specified node is the root of a subtree of potentially new nodes. +/// Correct any processed operands (this may change the node) and calculate the +/// NodeId. If the node itself changes to a processed node, it is not remapped - +/// the caller needs to take care of this. Returns the potentially changed node. +SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { + // If this was an existing node that is already done, we're done. + if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed) + return N; + + // Okay, we know that this node is new. Recursively walk all of its operands + // to see if they are new also. The depth of this walk is bounded by the size + // of the new tree that was constructed (usually 2-3 nodes), so we don't worry + // about revisiting of nodes. + // + // As we walk the operands, keep track of the number of nodes that are + // processed. If non-zero, this will become the new nodeid of this node. + // Operands may morph when they are analyzed. If so, the node will be + // updated after all operands have been analyzed. Since this is rare, + // the code tries to minimize overhead in the non-morphing case. + + std::vector<SDValue> NewOps; + unsigned NumProcessed = 0; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue OrigOp = N->getOperand(i); + SDValue Op = OrigOp; + + AnalyzeNewValue(Op); // Op may morph. + + if (Op.getNode()->getNodeId() == Processed) + ++NumProcessed; + + if (!NewOps.empty()) { + // Some previous operand changed. Add this one to the list. + NewOps.push_back(Op); + } else if (Op != OrigOp) { + // This is the first operand to change - add all operands so far. + NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i); + NewOps.push_back(Op); + } + } + + // Some operands changed - update the node. + if (!NewOps.empty()) { + SDNode *M = DAG.UpdateNodeOperands(N, NewOps); + if (M != N) { + // The node morphed into a different node. Normally for this to happen + // the original node would have to be marked NewNode. However this can + // in theory momentarily not be the case while ReplaceValueWith is doing + // its stuff. Mark the original node NewNode to help sanity checking. + N->setNodeId(NewNode); + if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed) + // It morphed into a previously analyzed node - nothing more to do. + return M; + + // It morphed into a different new node. Do the equivalent of passing + // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need + // to remap the operands, since they are the same as the operands we + // remapped above. + N = M; + } + } + + // Calculate the NodeId. + N->setNodeId(N->getNumOperands() - NumProcessed); + if (N->getNodeId() == ReadyToProcess) + Worklist.push_back(N); + + return N; +} + +/// Call AnalyzeNewNode, updating the node in Val if needed. +/// If the node changes to a processed node, then remap it. +void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { + Val.setNode(AnalyzeNewNode(Val.getNode())); + if (Val.getNode()->getNodeId() == Processed) + // We were passed a processed node, or it morphed into one - remap it. + RemapValue(Val); +} + +/// If the specified value was already legalized to another value, +/// replace it by that value. +void DAGTypeLegalizer::RemapValue(SDValue &V) { + auto Id = getTableId(V); + V = getSDValue(Id); +} + +void DAGTypeLegalizer::RemapId(TableId &Id) { + auto I = ReplacedValues.find(Id); + if (I != ReplacedValues.end()) { + assert(Id != I->second && "Id is mapped to itself."); + // Use path compression to speed up future lookups if values get multiply + // replaced with other values. + RemapId(I->second); + Id = I->second; + + // Note that N = IdToValueMap[Id] it is possible to have + // N.getNode()->getNodeId() == NewNode at this point because it is possible + // for a node to be put in the map before being processed. + } +} + +namespace { + /// This class is a DAGUpdateListener that listens for updates to nodes and + /// recomputes their ready state. + class NodeUpdateListener : public SelectionDAG::DAGUpdateListener { + DAGTypeLegalizer &DTL; + SmallSetVector<SDNode*, 16> &NodesToAnalyze; + public: + explicit NodeUpdateListener(DAGTypeLegalizer &dtl, + SmallSetVector<SDNode*, 16> &nta) + : SelectionDAG::DAGUpdateListener(dtl.getDAG()), + DTL(dtl), NodesToAnalyze(nta) {} + + void NodeDeleted(SDNode *N, SDNode *E) override { + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + // It is possible, though rare, for the deleted node N to occur as a + // target in a map, so note the replacement N -> E in ReplacedValues. + assert(E && "Node not replaced?"); + DTL.NoteDeletion(N, E); + + // In theory the deleted node could also have been scheduled for analysis. + // So remove it from the set of nodes which will be analyzed. + NodesToAnalyze.remove(N); + + // In general nothing needs to be done for E, since it didn't change but + // only gained new uses. However N -> E was just added to ReplacedValues, + // and the result of a ReplacedValues mapping is not allowed to be marked + // NewNode. So if E is marked NewNode, then it needs to be analyzed. + if (E->getNodeId() == DAGTypeLegalizer::NewNode) + NodesToAnalyze.insert(E); + } + + void NodeUpdated(SDNode *N) override { + // Node updates can mean pretty much anything. It is possible that an + // operand was set to something already processed (f.e.) in which case + // this node could become ready. Recompute its flags. + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + N->setNodeId(DAGTypeLegalizer::NewNode); + NodesToAnalyze.insert(N); + } + }; +} + + +/// The specified value was legalized to the specified other value. +/// Update the DAG and NodeIds replacing any uses of From to use To instead. +void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { + assert(From.getNode() != To.getNode() && "Potential legalization loop!"); + + // If expansion produced new nodes, make sure they are properly marked. + AnalyzeNewValue(To); + + // Anything that used the old node should now use the new one. Note that this + // can potentially cause recursive merging. + SmallSetVector<SDNode*, 16> NodesToAnalyze; + NodeUpdateListener NUL(*this, NodesToAnalyze); + do { + + // The old node may be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + auto FromId = getTableId(From); + auto ToId = getTableId(To); + + if (FromId != ToId) + ReplacedValues[FromId] = ToId; + DAG.ReplaceAllUsesOfValueWith(From, To); + + // Process the list of nodes that need to be reanalyzed. + while (!NodesToAnalyze.empty()) { + SDNode *N = NodesToAnalyze.back(); + NodesToAnalyze.pop_back(); + if (N->getNodeId() != DAGTypeLegalizer::NewNode) + // The node was analyzed while reanalyzing an earlier node - it is safe + // to skip. Note that this is not a morphing node - otherwise it would + // still be marked NewNode. + continue; + + // Analyze the node's operands and recalculate the node ID. + SDNode *M = AnalyzeNewNode(N); + if (M != N) { + // The node morphed into a different node. Make everyone use the new + // node instead. + assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + SDValue OldVal(N, i); + SDValue NewVal(M, i); + if (M->getNodeId() == Processed) + RemapValue(NewVal); + // OldVal may be a target of the ReplacedValues map which was marked + // NewNode to force reanalysis because it was updated. Ensure that + // anything that ReplacedValues mapped to OldVal will now be mapped + // all the way to NewVal. + auto OldValId = getTableId(OldVal); + auto NewValId = getTableId(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal); + if (OldValId != NewValId) + ReplacedValues[OldValId] = NewValId; + } + // The original node continues to exist in the DAG, marked NewNode. + } + } + // When recursively update nodes with new nodes, it is possible to have + // new uses of From due to CSE. If this happens, replace the new uses of + // From with To. + } while (!From.use_empty()); +} + +void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for promoted integer"); + AnalyzeNewValue(Result); + + auto &OpIdEntry = PromotedIntegers[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already promoted!"); + OpIdEntry = getTableId(Result); + Result->setFlags(Op->getFlags()); + + DAG.transferDbgValues(Op, Result); +} + +void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for softened float"); + AnalyzeNewValue(Result); + + auto &OpIdEntry = SoftenedFloats[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already converted to integer!"); + OpIdEntry = getTableId(Result); +} + +void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for promoted float"); + AnalyzeNewValue(Result); + + auto &OpIdEntry = PromotedFloats[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already promoted!"); + OpIdEntry = getTableId(Result); +} + +void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { + // Note that in some cases vector operation operands may be greater than + // the vector element type. For example BUILD_VECTOR of type <1 x i1> with + // a constant i8 operand. + assert(Result.getValueSizeInBits() >= Op.getScalarValueSizeInBits() && + "Invalid type for scalarized vector"); + AnalyzeNewValue(Result); + + auto &OpIdEntry = ScalarizedVectors[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already scalarized!"); + OpIdEntry = getTableId(Result); +} + +void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)]; + assert((Entry.first != 0) && "Operand isn't expanded"); + Lo = getSDValue(Entry.first); + Hi = getSDValue(Entry.second); +} + +void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded integer"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Transfer debug values. Don't invalidate the source debug value until it's + // been transferred to the high and low bits. + if (DAG.getDataLayout().isBigEndian()) { + DAG.transferDbgValues(Op, Hi, 0, Hi.getValueSizeInBits(), false); + DAG.transferDbgValues(Op, Lo, Hi.getValueSizeInBits(), + Lo.getValueSizeInBits()); + } else { + DAG.transferDbgValues(Op, Lo, 0, Lo.getValueSizeInBits(), false); + DAG.transferDbgValues(Op, Hi, Lo.getValueSizeInBits(), + Hi.getValueSizeInBits()); + } + + // Remember that this is the result of the node. + std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)]; + assert((Entry.first == 0) && "Node already expanded"); + Entry.first = getTableId(Lo); + Entry.second = getTableId(Hi); +} + +void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)]; + assert((Entry.first != 0) && "Operand isn't expanded"); + Lo = getSDValue(Entry.first); + Hi = getSDValue(Entry.second); +} + +void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded float"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)]; + assert((Entry.first == 0) && "Node already expanded"); + Entry.first = getTableId(Lo); + Entry.second = getTableId(Hi); +} + +void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)]; + Lo = getSDValue(Entry.first); + Hi = getSDValue(Entry.second); + assert(Lo.getNode() && "Operand isn't split"); + ; +} + +void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType().getVectorElementType() == + Op.getValueType().getVectorElementType() && + 2*Lo.getValueType().getVectorNumElements() == + Op.getValueType().getVectorNumElements() && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for split vector"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)]; + assert((Entry.first == 0) && "Node already split"); + Entry.first = getTableId(Lo); + Entry.second = getTableId(Hi); +} + +void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for widened vector"); + AnalyzeNewValue(Result); + + auto &OpIdEntry = WidenedVectors[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node already widened!"); + OpIdEntry = getTableId(Result); +} + + +//===----------------------------------------------------------------------===// +// Utilities. +//===----------------------------------------------------------------------===// + +/// Convert to an integer of the same size. +SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { + unsigned BitWidth = Op.getValueSizeInBits(); + return DAG.getNode(ISD::BITCAST, SDLoc(Op), + EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); +} + +/// Convert to a vector of integers of the same size. +SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { + assert(Op.getValueType().isVector() && "Only applies to vectors!"); + unsigned EltWidth = Op.getScalarValueSizeInBits(); + EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); + auto EltCnt = Op.getValueType().getVectorElementCount(); + return DAG.getNode(ISD::BITCAST, SDLoc(Op), + EVT::getVectorVT(*DAG.getContext(), EltNVT, EltCnt), Op); +} + +SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, + EVT DestVT) { + SDLoc dl(Op); + // Create the stack frame object. Make sure it is aligned for both + // the source and destination types. + SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); + // Emit a store to the stack slot. + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, MachinePointerInfo()); + // Result is a load from the stack slot. + return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo()); +} + +/// Replace the node's results with custom code provided by the target and +/// return "true", or do nothing and return "false". +/// The last parameter is FALSE if we are dealing with a node with legal +/// result types and illegal operand. The second parameter denotes the type of +/// illegal OperandNo in that case. +/// The last parameter being TRUE means we are dealing with a +/// node with illegal result types. The second parameter denotes the type of +/// illegal ResNo in that case. +bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector<SDValue, 8> Results; + if (LegalizeResult) + TLI.ReplaceNodeResults(N, Results, DAG); + else + TLI.LowerOperationWrapper(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom lower it after all. + return false; + + // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to + // provide the same kind of custom splitting behavior. + if (Results.size() == N->getNumValues() + 1 && LegalizeResult) { + // We've legalized a return type by splitting it. If there is a chain, + // replace that too. + SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]); + if (N->getNumValues() > 1) + ReplaceValueWith(SDValue(N, 1), Results[2]); + return true; + } + + // Make everything that once used N's values now use those in Results instead. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) { + ReplaceValueWith(SDValue(N, i), Results[i]); + } + return true; +} + + +/// Widen the node's results with custom code provided by the target and return +/// "true", or do nothing and return "false". +bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector<SDValue, 8> Results; + TLI.ReplaceNodeResults(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom widen lower its result after all. + return false; + + // Update the widening map. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) { + // If this is a chain output just replace it. + if (Results[i].getValueType() == MVT::Other) + ReplaceValueWith(SDValue(N, i), Results[i]); + else + SetWidenedVector(SDValue(N, i), Results[i]); + } + return true; +} + +SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + if (i != ResNo) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); + return SDValue(N->getOperand(ResNo)); +} + +/// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the +/// given value. +void DAGTypeLegalizer::GetPairElements(SDValue Pair, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(Pair); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType()); + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(0, dl)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(1, dl)); +} + +/// Build an integer with low bits Lo and high bits Hi. +SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { + // Arbitrarily use dlHi for result SDLoc + SDLoc dlHi(Hi); + SDLoc dlLo(Lo); + EVT LVT = Lo.getValueType(); + EVT HVT = Hi.getValueType(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + LVT.getSizeInBits() + HVT.getSizeInBits()); + + EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout(), false); + Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); + Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, + DAG.getConstant(LVT.getSizeInBits(), dlHi, ShiftAmtVT)); + return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); +} + +/// Convert the node into a libcall with the same prototype. +SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, + bool isSigned) { + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + unsigned NumOps = N->getNumOperands(); + SDLoc dl(N); + if (NumOps == 0) { + return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions, + dl).first; + } else if (NumOps == 1) { + SDValue Op = N->getOperand(0); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions, + dl).first; + } else if (NumOps == 2) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, + dl).first; + } + SmallVector<SDValue, 8> Ops(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + Ops[i] = N->getOperand(i); + + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; +} + +/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that +/// the first operand is the in-chain. +std::pair<SDValue, SDValue> +DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, + bool isSigned) { + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.IsSExt = isSigned; + Entry.IsZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)) + .setChain(InChain) + .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, + std::move(Args)) + .setSExtResult(isSigned) + .setZExtResult(!isSigned); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + return CallInfo; +} + +/// Promote the given target boolean to a target boolean of the given type. +/// A target boolean is an integer value, not necessarily of type i1, the bits +/// of which conform to getBooleanContents. +/// +/// ValVT is the type of values that produced the boolean. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { + SDLoc dl(Bool); + EVT BoolVT = getSetCCResultType(ValVT); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT)); + return DAG.getNode(ExtendCode, dl, BoolVT, Bool); +} + +/// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + EVT LoVT, EVT HiVT, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(Op); + assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == + Op.getValueSizeInBits() && "Invalid integer splitting!"); + Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); + unsigned ReqShiftAmountInBits = + Log2_32_Ceil(Op.getValueType().getSizeInBits()); + MVT ShiftAmountTy = + TLI.getScalarShiftAmountTy(DAG.getDataLayout(), Op.getValueType()); + if (ReqShiftAmountInBits > ShiftAmountTy.getSizeInBits()) + ShiftAmountTy = MVT::getIntegerVT(NextPowerOf2(ReqShiftAmountInBits)); + Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, + DAG.getConstant(LoVT.getSizeInBits(), dl, ShiftAmountTy)); + Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); +} + +/// Return the lower and upper halves of Op's bits in a value type half the +/// size of Op's. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + SDValue &Lo, SDValue &Hi) { + EVT HalfVT = + EVT::getIntegerVT(*DAG.getContext(), Op.getValueSizeInBits() / 2); + SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Entry Point +//===----------------------------------------------------------------------===// + +/// This transforms the SelectionDAG into a SelectionDAG that only uses types +/// natively supported by the target. Returns "true" if it made any changes. +/// +/// Note that this is an involved process that may invalidate pointers into +/// the graph. +bool SelectionDAG::LegalizeTypes() { + return DAGTypeLegalizer(*this).run(); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h new file mode 100644 index 0000000000000..4afbae69128a1 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -0,0 +1,962 @@ +//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the DAGTypeLegalizer class. This is a private interface +// shared between the code that implements the SelectionDAG::LegalizeTypes +// method. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" + +namespace llvm { + +//===----------------------------------------------------------------------===// +/// This takes an arbitrary SelectionDAG as input and hacks on it until only +/// value types the target machine can handle are left. This involves promoting +/// small sizes to large sizes or splitting up large values into small values. +/// +class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { + const TargetLowering &TLI; + SelectionDAG &DAG; +public: + /// This pass uses the NodeId on the SDNodes to hold information about the + /// state of the node. The enum has all the values. + enum NodeIdFlags { + /// All operands have been processed, so this node is ready to be handled. + ReadyToProcess = 0, + + /// This is a new node, not before seen, that was created in the process of + /// legalizing some other node. + NewNode = -1, + + /// This node's ID needs to be set to the number of its unprocessed + /// operands. + Unanalyzed = -2, + + /// This is a node that has already been processed. + Processed = -3 + + // 1+ - This is a node which has this many unprocessed operands. + }; +private: + + /// This is a bitvector that contains two bits for each simple value type, + /// where the two bits correspond to the LegalizeAction enum from + /// TargetLowering. This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// Return how we should legalize values of this type. + TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const { + return TLI.getTypeAction(*DAG.getContext(), VT); + } + + /// Return true if this type is legal on this target. + bool isTypeLegal(EVT VT) const { + return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; + } + + /// Return true if this is a simple legal type. + bool isSimpleLegalType(EVT VT) const { + return VT.isSimple() && TLI.isTypeLegal(VT); + } + + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + } + + /// Pretend all of this node's results are legal. + bool IgnoreNodeResults(SDNode *N) const { + return N->getOpcode() == ISD::TargetConstant || + N->getOpcode() == ISD::Register; + } + + // Bijection from SDValue to unique id. As each created node gets a + // new id we do not need to worry about reuse expunging. Should we + // run out of ids, we can do a one time expensive compactifcation. + typedef unsigned TableId; + + TableId NextValueId = 1; + + SmallDenseMap<SDValue, TableId, 8> ValueToIdMap; + SmallDenseMap<TableId, SDValue, 8> IdToValueMap; + + /// For integer nodes that are below legal width, this map indicates what + /// promoted value to use. + SmallDenseMap<TableId, TableId, 8> PromotedIntegers; + + /// For integer nodes that need to be expanded this map indicates which + /// operands are the expanded version of the input. + SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedIntegers; + + /// For floating-point nodes converted to integers of the same size, this map + /// indicates the converted value to use. + SmallDenseMap<TableId, TableId, 8> SoftenedFloats; + + /// For floating-point nodes that have a smaller precision than the smallest + /// supported precision, this map indicates what promoted value to use. + SmallDenseMap<TableId, TableId, 8> PromotedFloats; + + /// For float nodes that need to be expanded this map indicates which operands + /// are the expanded version of the input. + SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats; + + /// For nodes that are <1 x ty>, this map indicates the scalar value of type + /// 'ty' to use. + SmallDenseMap<TableId, TableId, 8> ScalarizedVectors; + + /// For nodes that need to be split this map indicates which operands are the + /// expanded version of the input. + SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> SplitVectors; + + /// For vector nodes that need to be widened, indicates the widened value to + /// use. + SmallDenseMap<TableId, TableId, 8> WidenedVectors; + + /// For values that have been replaced with another, indicates the replacement + /// value to use. + SmallDenseMap<TableId, TableId, 8> ReplacedValues; + + /// This defines a worklist of nodes to process. In order to be pushed onto + /// this worklist, all operands of a node must have already been processed. + SmallVector<SDNode*, 128> Worklist; + + TableId getTableId(SDValue V) { + assert(V.getNode() && "Getting TableId on SDValue()"); + + auto I = ValueToIdMap.find(V); + if (I != ValueToIdMap.end()) { + // replace if there's been a shift. + RemapId(I->second); + assert(I->second && "All Ids should be nonzero"); + return I->second; + } + // Add if it's not there. + ValueToIdMap.insert(std::make_pair(V, NextValueId)); + IdToValueMap.insert(std::make_pair(NextValueId, V)); + ++NextValueId; + assert(NextValueId != 0 && + "Ran out of Ids. Increase id type size or add compactification"); + return NextValueId - 1; + } + + const SDValue &getSDValue(TableId &Id) { + RemapId(Id); + assert(Id && "TableId should be non-zero"); + return IdToValueMap[Id]; + } + +public: + explicit DAGTypeLegalizer(SelectionDAG &dag) + : TLI(dag.getTargetLoweringInfo()), DAG(dag), + ValueTypeActions(TLI.getValueTypeActions()) { + static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, + "Too many value types for ValueTypeActions to hold!"); + } + + /// This is the main entry point for the type legalizer. This does a + /// top-down traversal of the dag, legalizing types as it goes. Returns + /// "true" if it made any changes. + bool run(); + + void NoteDeletion(SDNode *Old, SDNode *New) { + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { + TableId NewId = getTableId(SDValue(New, i)); + TableId OldId = getTableId(SDValue(Old, i)); + + if (OldId != NewId) + ReplacedValues[OldId] = NewId; + + // Delete Node from tables. + ValueToIdMap.erase(SDValue(Old, i)); + IdToValueMap.erase(OldId); + PromotedIntegers.erase(OldId); + ExpandedIntegers.erase(OldId); + SoftenedFloats.erase(OldId); + PromotedFloats.erase(OldId); + ExpandedFloats.erase(OldId); + ScalarizedVectors.erase(OldId); + SplitVectors.erase(OldId); + WidenedVectors.erase(OldId); + } + } + + SelectionDAG &getDAG() const { return DAG; } + +private: + SDNode *AnalyzeNewNode(SDNode *N); + void AnalyzeNewValue(SDValue &Val); + void PerformExpensiveChecks(); + void RemapId(TableId &Id); + void RemapValue(SDValue &V); + + // Common routines. + SDValue BitConvertToInteger(SDValue Op); + SDValue BitConvertVectorToIntegerVector(SDValue Op); + SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT); + bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult); + bool CustomWidenLowerNode(SDNode *N, EVT VT); + + /// Replace each result of the given MERGE_VALUES node with the corresponding + /// input operand, except for the result 'ResNo', for which the corresponding + /// input operand is returned. + SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); + + SDValue JoinIntegers(SDValue Lo, SDValue Hi); + SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); + + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); + + SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); + + void ReplaceValueWith(SDValue From, SDValue To); + void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, + SDValue &Lo, SDValue &Hi); + + void AddToWorklist(SDNode *N) { + N->setNodeId(ReadyToProcess); + Worklist.push_back(N); + } + + //===--------------------------------------------------------------------===// + // Integer Promotion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// Given a processed operand Op which was promoted to a larger integer type, + /// this returns the promoted value. The low bits of the promoted value + /// corresponding to the original type are exactly equal to Op. + /// The extra bits contain rubbish, so the promoted value may need to be zero- + /// or sign-extended from the original type before it is usable (the helpers + /// SExtPromotedInteger and ZExtPromotedInteger can do this for you). + /// For example, if Op is an i16 and was promoted to an i32, then this method + /// returns an i32, the lower 16 bits of which coincide with Op, and the upper + /// 16 bits of which contain rubbish. + SDValue GetPromotedInteger(SDValue Op) { + TableId &PromotedId = PromotedIntegers[getTableId(Op)]; + SDValue PromotedOp = getSDValue(PromotedId); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedInteger(SDValue Op, SDValue Result); + + /// Get a promoted operand and sign extend it to the final size. + SDValue SExtPromotedInteger(SDValue Op) { + EVT OldVT = Op.getValueType(); + SDLoc dl(Op); + Op = GetPromotedInteger(Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, + DAG.getValueType(OldVT)); + } + + /// Get a promoted operand and zero extend it to the final size. + SDValue ZExtPromotedInteger(SDValue Op) { + EVT OldVT = Op.getValueType(); + SDLoc dl(Op); + Op = GetPromotedInteger(Op); + return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); + } + + // Get a promoted operand and sign or zero extend it to the final size + // (depending on TargetLoweringInfo::isSExtCheaperThanZExt). For a given + // subtarget and type, the choice of sign or zero-extension will be + // consistent. + SDValue SExtOrZExtPromotedInteger(SDValue Op) { + EVT OldVT = Op.getValueType(); + SDLoc DL(Op); + Op = GetPromotedInteger(Op); + if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType())) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op, + DAG.getValueType(OldVT)); + return DAG.getZeroExtendInReg(Op, DL, OldVT.getScalarType()); + } + + // Integer Result Promotion. + void PromoteIntegerResult(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_AssertSext(SDNode *N); + SDValue PromoteIntRes_AssertZext(SDNode *N); + SDValue PromoteIntRes_Atomic0(AtomicSDNode *N); + SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); + SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo); + SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); + SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N); + SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N); + SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); + SDValue PromoteIntRes_BITCAST(SDNode *N); + SDValue PromoteIntRes_BSWAP(SDNode *N); + SDValue PromoteIntRes_BITREVERSE(SDNode *N); + SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); + SDValue PromoteIntRes_Constant(SDNode *N); + SDValue PromoteIntRes_CTLZ(SDNode *N); + SDValue PromoteIntRes_CTPOP(SDNode *N); + SDValue PromoteIntRes_CTTZ(SDNode *N); + SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); + SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); + SDValue PromoteIntRes_INT_EXTEND(SDNode *N); + SDValue PromoteIntRes_LOAD(LoadSDNode *N); + SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); + SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N); + SDValue PromoteIntRes_Overflow(SDNode *N); + SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_SELECT(SDNode *N); + SDValue PromoteIntRes_VSELECT(SDNode *N); + SDValue PromoteIntRes_SELECT_CC(SDNode *N); + SDValue PromoteIntRes_SETCC(SDNode *N); + SDValue PromoteIntRes_SHL(SDNode *N); + SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue PromoteIntRes_SRA(SDNode *N); + SDValue PromoteIntRes_SRL(SDNode *N); + SDValue PromoteIntRes_TRUNCATE(SDNode *N); + SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_UNDEF(SDNode *N); + SDValue PromoteIntRes_VAARG(SDNode *N); + SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); + SDValue PromoteIntRes_MULFIX(SDNode *N); + SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); + SDValue PromoteIntRes_VECREDUCE(SDNode *N); + SDValue PromoteIntRes_ABS(SDNode *N); + + // Integer Operand Promotion. + bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); + SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N); + SDValue PromoteIntOp_BITCAST(SDNode *N); + SDValue PromoteIntOp_BUILD_PAIR(SDNode *N); + SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); + SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N); + SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_Shift(SDNode *N); + SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); + SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_TRUNCATE(SDNode *N); + SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N); + SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MULFIX(SDNode *N); + SDValue PromoteIntOp_FPOWI(SDNode *N); + SDValue PromoteIntOp_VECREDUCE(SDNode *N); + + void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); + + //===--------------------------------------------------------------------===// + // Integer Expansion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// Given a processed operand Op which was expanded into two integers of half + /// the size, this returns the two halves. The low bits of Op are exactly + /// equal to the bits of Lo; the high bits exactly equal Hi. + /// For example, if Op is an i64 which was expanded into two i32's, then this + /// method returns the two i32's, with Lo being equal to the lower 32 bits of + /// Op, and Hi being equal to the upper 32 bits. + void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi); + + // Integer Result Expansion. + void ExpandIntegerResult(SDNode *N, unsigned ResNo); + void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LLROUND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandShiftByConstant(SDNode *N, const APInt &Amt, + SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Integer Operand Expansion. + bool ExpandIntegerOperand(SDNode *N, unsigned OpNo); + SDValue ExpandIntOp_BR_CC(SDNode *N); + SDValue ExpandIntOp_SELECT_CC(SDNode *N); + SDValue ExpandIntOp_SETCC(SDNode *N); + SDValue ExpandIntOp_SETCCCARRY(SDNode *N); + SDValue ExpandIntOp_Shift(SDNode *N); + SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ExpandIntOp_TRUNCATE(SDNode *N); + SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_RETURNADDR(SDNode *N); + SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); + + void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, const SDLoc &dl); + + //===--------------------------------------------------------------------===// + // Float to Integer Conversion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSoftenedFloat - Given a processed operand Op which was converted to an + /// integer of the same size, this returns the integer. The integer contains + /// exactly the same bits as Op - only the type changed. For example, if Op + /// is an f32 which was softened to an i32, then this method returns an i32, + /// the bits of which coincide with those of Op + SDValue GetSoftenedFloat(SDValue Op) { + TableId Id = getTableId(Op); + auto Iter = SoftenedFloats.find(Id); + if (Iter == SoftenedFloats.end()) { + assert(isSimpleLegalType(Op.getValueType()) && + "Operand wasn't converted to integer?"); + return Op; + } + SDValue SoftenedOp = getSDValue(Iter->second); + assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?"); + return SoftenedOp; + } + void SetSoftenedFloat(SDValue Op, SDValue Result); + + // Convert Float Results to Integer. + void SoftenFloatResult(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_BITCAST(SDNode *N); + SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); + SDValue SoftenFloatRes_ConstantFP(SDNode *N); + SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FMINNUM(SDNode *N); + SDValue SoftenFloatRes_FMAXNUM(SDNode *N); + SDValue SoftenFloatRes_FADD(SDNode *N); + SDValue SoftenFloatRes_FCEIL(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOS(SDNode *N); + SDValue SoftenFloatRes_FDIV(SDNode *N); + SDValue SoftenFloatRes_FEXP(SDNode *N); + SDValue SoftenFloatRes_FEXP2(SDNode *N); + SDValue SoftenFloatRes_FFLOOR(SDNode *N); + SDValue SoftenFloatRes_FLOG(SDNode *N); + SDValue SoftenFloatRes_FLOG2(SDNode *N); + SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMA(SDNode *N); + SDValue SoftenFloatRes_FMUL(SDNode *N); + SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); + SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); + SDValue SoftenFloatRes_FP_ROUND(SDNode *N); + SDValue SoftenFloatRes_FPOW(SDNode *N); + SDValue SoftenFloatRes_FPOWI(SDNode *N); + SDValue SoftenFloatRes_FREM(SDNode *N); + SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FROUND(SDNode *N); + SDValue SoftenFloatRes_FSIN(SDNode *N); + SDValue SoftenFloatRes_FSQRT(SDNode *N); + SDValue SoftenFloatRes_FSUB(SDNode *N); + SDValue SoftenFloatRes_FTRUNC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_SELECT(SDNode *N); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_UNDEF(SDNode *N); + SDValue SoftenFloatRes_VAARG(SDNode *N); + SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); + + // Convert Float Operand to Integer. + bool SoftenFloatOperand(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_BITCAST(SDNode *N); + SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); + SDValue SoftenFloatOp_FP_ROUND(SDNode *N); + SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); + SDValue SoftenFloatOp_LROUND(SDNode *N); + SDValue SoftenFloatOp_LLROUND(SDNode *N); + SDValue SoftenFloatOp_LRINT(SDNode *N); + SDValue SoftenFloatOp_LLRINT(SDNode *N); + SDValue SoftenFloatOp_SELECT_CC(SDNode *N); + SDValue SoftenFloatOp_SETCC(SDNode *N); + SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Float Expansion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// Given a processed operand Op which was expanded into two floating-point + /// values of half the size, this returns the two halves. + /// The low bits of Op are exactly equal to the bits of Lo; the high bits + /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded + /// into two f64's, then this method returns the two f64's, with Lo being + /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits. + void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi); + + // Float Result Expansion. + void ExpandFloatResult(SDNode *N, unsigned ResNo); + void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Float Operand Expansion. + bool ExpandFloatOperand(SDNode *N, unsigned OpNo); + SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N); + SDValue ExpandFloatOp_FP_ROUND(SDNode *N); + SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); + SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); + SDValue ExpandFloatOp_LROUND(SDNode *N); + SDValue ExpandFloatOp_LLROUND(SDNode *N); + SDValue ExpandFloatOp_LRINT(SDNode *N); + SDValue ExpandFloatOp_LLRINT(SDNode *N); + SDValue ExpandFloatOp_SELECT_CC(SDNode *N); + SDValue ExpandFloatOp_SETCC(SDNode *N); + SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); + + void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, const SDLoc &dl); + + //===--------------------------------------------------------------------===// + // Float promotion support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + SDValue GetPromotedFloat(SDValue Op) { + TableId &PromotedId = PromotedFloats[getTableId(Op)]; + SDValue PromotedOp = getSDValue(PromotedId); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedFloat(SDValue Op, SDValue Result); + + void PromoteFloatResult(SDNode *N, unsigned ResNo); + SDValue PromoteFloatRes_BITCAST(SDNode *N); + SDValue PromoteFloatRes_BinOp(SDNode *N); + SDValue PromoteFloatRes_ConstantFP(SDNode *N); + SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N); + SDValue PromoteFloatRes_FMAD(SDNode *N); + SDValue PromoteFloatRes_FPOWI(SDNode *N); + SDValue PromoteFloatRes_FP_ROUND(SDNode *N); + SDValue PromoteFloatRes_LOAD(SDNode *N); + SDValue PromoteFloatRes_SELECT(SDNode *N); + SDValue PromoteFloatRes_SELECT_CC(SDNode *N); + SDValue PromoteFloatRes_UnaryOp(SDNode *N); + SDValue PromoteFloatRes_UNDEF(SDNode *N); + SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N); + SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); + + bool PromoteFloatOperand(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Scalarization Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// Given a processed one-element vector Op which was scalarized to its + /// element type, this returns the element. For example, if Op is a v1i32, + /// Op = < i32 val >, this method returns val, an i32. + SDValue GetScalarizedVector(SDValue Op) { + TableId &ScalarizedId = ScalarizedVectors[getTableId(Op)]; + SDValue ScalarizedOp = getSDValue(ScalarizedId); + assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?"); + return ScalarizedOp; + } + void SetScalarizedVector(SDValue Op, SDValue Result); + + // Vector Result Scalarization: <1 x ty> -> ty. + void ScalarizeVectorResult(SDNode *N, unsigned ResNo); + SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_TernaryOp(SDNode *N); + SDValue ScalarizeVecRes_UnaryOp(SDNode *N); + SDValue ScalarizeVecRes_StrictFPOp(SDNode *N); + SDValue ScalarizeVecRes_OverflowOp(SDNode *N, unsigned ResNo); + SDValue ScalarizeVecRes_InregOp(SDNode *N); + SDValue ScalarizeVecRes_VecInregOp(SDNode *N); + + SDValue ScalarizeVecRes_BITCAST(SDNode *N); + SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); + SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N); + SDValue ScalarizeVecRes_FPOWI(SDNode *N); + SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_VSELECT(SDNode *N); + SDValue ScalarizeVecRes_SELECT(SDNode *N); + SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); + SDValue ScalarizeVecRes_SETCC(SDNode *N); + SDValue ScalarizeVecRes_UNDEF(SDNode *N); + SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); + + SDValue ScalarizeVecRes_MULFIX(SDNode *N); + + // Vector Operand Scalarization: <1 x ty> -> ty. + bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_BITCAST(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N); + SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); + SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecOp_VSELECT(SDNode *N); + SDValue ScalarizeVecOp_VSETCC(SDNode *N); + SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_VECREDUCE(SDNode *N); + + //===--------------------------------------------------------------------===// + // Vector Splitting Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// Given a processed vector Op which was split into vectors of half the size, + /// this method returns the halves. The first elements of Op coincide with the + /// elements of Lo; the remaining elements of Op coincide with the elements of + /// Hi: Op is what you would get by concatenating Lo and Hi. + /// For example, if Op is a v8i32 that was split into two v4i32's, then this + /// method returns the two v4i32's, with Lo corresponding to the first 4 + /// elements of Op, and Hi to the last 4 elements. + void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); + + // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. + void SplitVectorResult(SDNode *N, unsigned ResNo); + void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); + + void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi); + + void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, + SDValue &Hi); + void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. + bool SplitVectorOperand(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_UnaryOp(SDNode *N); + SDValue SplitVecOp_TruncateHelper(SDNode *N); + + SDValue SplitVecOp_BITCAST(SDNode *N); + SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); + SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo); + SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); + SDValue SplitVecOp_VSETCC(SDNode *N); + SDValue SplitVecOp_FP_ROUND(SDNode *N); + SDValue SplitVecOp_FCOPYSIGN(SDNode *N); + + //===--------------------------------------------------------------------===// + // Vector Widening Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// Given a processed vector Op which was widened into a larger vector, this + /// method returns the larger vector. The elements of the returned vector + /// consist of the elements of Op followed by elements containing rubbish. + /// For example, if Op is a v2i32 that was widened to a v4i32, then this + /// method returns a v4i32 for which the first two elements are the same as + /// those of Op, while the last two elements contain rubbish. + SDValue GetWidenedVector(SDValue Op) { + TableId &WidenedId = WidenedVectors[getTableId(Op)]; + SDValue WidenedOp = getSDValue(WidenedId); + assert(WidenedOp.getNode() && "Operand wasn't widened?"); + return WidenedOp; + } + void SetWidenedVector(SDValue Op, SDValue Result); + + // Widen Vector Result Promotion. + void WidenVectorResult(SDNode *N, unsigned ResNo); + SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo); + SDValue WidenVecRes_BITCAST(SDNode* N); + SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); + SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); + SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N); + SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); + SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); + SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); + SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); + SDValue WidenVecRes_SELECT(SDNode* N); + SDValue WidenVSELECTAndMask(SDNode *N); + SDValue WidenVecRes_SELECT_CC(SDNode* N); + SDValue WidenVecRes_SETCC(SDNode* N); + SDValue WidenVecRes_UNDEF(SDNode *N); + SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); + + SDValue WidenVecRes_Ternary(SDNode *N); + SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_BinaryCanTrap(SDNode *N); + SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N); + SDValue WidenVecRes_StrictFP(SDNode *N); + SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo); + SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_Convert_StrictFP(SDNode *N); + SDValue WidenVecRes_FCOPYSIGN(SDNode *N); + SDValue WidenVecRes_POWI(SDNode *N); + SDValue WidenVecRes_Shift(SDNode *N); + SDValue WidenVecRes_Unary(SDNode *N); + SDValue WidenVecRes_InregOp(SDNode *N); + + // Widen Vector Operand. + bool WidenVectorOperand(SDNode *N, unsigned OpNo); + SDValue WidenVecOp_BITCAST(SDNode *N); + SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTEND(SDNode *N); + SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_SETCC(SDNode* N); + SDValue WidenVecOp_VSELECT(SDNode *N); + + SDValue WidenVecOp_Convert(SDNode *N); + SDValue WidenVecOp_FCOPYSIGN(SDNode *N); + SDValue WidenVecOp_VECREDUCE(SDNode *N); + + /// Helper function to generate a set of operations to perform + /// a vector operation for a wider type. + /// + SDValue UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE); + + //===--------------------------------------------------------------------===// + // Vector Widening Utilities Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// Helper function to generate a set of loads to load a vector with a + /// resulting wider type. It takes: + /// LdChain: list of chains for the load to be generated. + /// Ld: load to widen + SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, + LoadSDNode *LD); + + /// Helper function to generate a set of extension loads to load a vector with + /// a resulting wider type. It takes: + /// LdChain: list of chains for the load to be generated. + /// Ld: load to widen + /// ExtType: extension element type + SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, + LoadSDNode *LD, ISD::LoadExtType ExtType); + + /// Helper function to generate a set of stores to store a widen vector into + /// non-widen memory. + /// StChain: list of chains for the stores we have generated + /// ST: store of a widen value + void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); + + /// Helper function to generate a set of stores to store a truncate widen + /// vector into non-widen memory. + /// StChain: list of chains for the stores we have generated + /// ST: store of a widen value + void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, + StoreSDNode *ST); + + /// Modifies a vector input (widen or narrows) to a vector of NVT. The + /// input vector must have the same element type as NVT. + /// When FillWithZeroes is "on" the vector will be widened with zeroes. + /// By default, the vector will be widened with undefined values. + SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false); + + /// Return a mask of vector type MaskVT to replace InMask. Also adjust + /// MaskVT to ToMaskVT if needed with vector extension or truncation. + SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT); + + //===--------------------------------------------------------------------===// + // Generic Splitting: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // not necessarily identical types. As such they can be used for splitting + // vectors and expanding integers and floats. + + void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isVector()) + GetSplitVector(Op, Lo, Hi); + else if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + /// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the + /// given value. + void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); + + // Generic Result Splitting. + void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + + void SplitVSETCC(const SDNode *N); + + //===--------------------------------------------------------------------===// + // Generic Expansion: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // identical types of half the size, and that the Lo/Hi part is stored first + // in memory on little/big-endian machines, followed by the Hi/Lo part. As + // such they can be used for expanding integers and floats. + + void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + + /// This function will split the integer \p Op into \p NumElements + /// operations of type \p EltVT and store them in \p Ops. + void IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, EVT EltVT); + + // Generic Result Expansion. + void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); + void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi); + + // Generic Operand Expansion. + SDValue ExpandOp_BITCAST (SDNode *N); + SDValue ExpandOp_BUILD_VECTOR (SDNode *N); + SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N); + SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); + SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N); + SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo); +}; + +} // end namespace llvm. + +#endif diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp new file mode 100644 index 0000000000000..5562f400b6e1d --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -0,0 +1,561 @@ +//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements generic type expansion and splitting for LegalizeTypes. +// The routines here perform legalization when the details of the type (such as +// whether it is an integer or a float) do not matter. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. The Lo/Hi part +// is required to be stored first in memory on little/big-endian machines. +// Splitting is the act of changing a computation in an illegal type to be a +// computation in two not necessarily identical registers of a smaller type. +// There are no requirements on how the type is represented in memory. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/IR/DataLayout.h" +using namespace llvm; + +#define DEBUG_TYPE "legalize-types" + +//===----------------------------------------------------------------------===// +// Generic Result Expansion. +//===----------------------------------------------------------------------===// + +// These routines assume that the Lo/Hi part is stored first in memory on +// little/big-endian machines, followed by the Hi/Lo part. This means that +// they cannot be used as is on vectors, for which Lo is always stored first. +void DAGTypeLegalizer::ExpandRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + GetExpandedOp(Op, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + SDLoc dl(N); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: + break; + case TargetLowering::TypePromoteFloat: + llvm_unreachable("Bitcast of a promotion-needing float should never need" + "expansion"); + case TargetLowering::TypeSoftenFloat: + SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: { + auto &DL = DAG.getDataLayout(); + // Convert the expanded pieces of the input. + GetExpandedOp(InOp, Lo, Hi); + if (TLI.hasBigEndianPartOrdering(InVT, DL) != + TLI.hasBigEndianPartOrdering(OutVT, DL)) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + } + case TargetLowering::TypeSplitVector: + GetSplitVector(InOp, Lo, Hi); + if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + case TargetLowering::TypeScalarizeVector: + // Convert the element instead. + SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + case TargetLowering::TypeWidenVector: { + assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); + InOp = GetWidenedVector(InOp); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); + std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); + if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + } + } + + if (InVT.isVector() && OutVT.isInteger()) { + // Handle cases like i64 = BITCAST v1i64 on x86, where the operand + // is legal but the result is not. + unsigned NumElems = 2; + EVT ElemVT = NOutVT; + EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + + // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>. + while (!isTypeLegal(NVT)) { + unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2; + // If the element size is smaller than byte, bail. + if (NewSizeInBits < 8) + break; + NumElems *= 2; + ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits); + NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + } + + if (isTypeLegal(NVT)) { + SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp); + + SmallVector<SDValue, 8> Vals; + for (unsigned i = 0; i < NumElems; ++i) + Vals.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + + // Build Lo, Hi pair by pairing extracted elements if needed. + unsigned Slot = 0; + for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) { + // Each iteration will BUILD_PAIR two nodes and append the result until + // there are only two nodes left, i.e. Lo and Hi. + SDValue LHS = Vals[Slot]; + SDValue RHS = Vals[Slot + 1]; + + if (DAG.getDataLayout().isBigEndian()) + std::swap(LHS, RHS); + + Vals.push_back(DAG.getNode( + ISD::BUILD_PAIR, dl, + EVT::getIntegerVT(*DAG.getContext(), LHS.getValueSizeInBits() << 1), + LHS, RHS)); + } + Lo = Vals[Slot++]; + Hi = Vals[Slot++]; + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + return; + } + } + + // Lower the bit-convert to a store/load from the stack. + assert(NOutVT.isByteSized() && "Expanded type not byte sized!"); + + // Create the stack frame object. Make sure it is aligned for both + // the source and expanded destination types. + unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment( + NOutVT.getTypeForEVT(*DAG.getContext())); + SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo); + + // Load the first half from the stack slot. + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo); + + // Increment the pointer to the other half. + unsigned IncrementSize = NOutVT.getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, dl, + StackPtr.getValueType())); + + // Load the second half from the stack slot. + Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), + MinAlign(Alignment, IncrementSize)); + + // Handle endianness of the load. + if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Return the operands. + Lo = N->getOperand(0); + Hi = N->getOperand(1); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + GetExpandedOp(N->getOperand(0), Lo, Hi); + SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? + Hi : Lo; + + assert(Part.getValueType() == N->getValueType(0) && + "Type twice as big as expanded type not itself expanded!"); + + GetPairElements(Part, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue OldVec = N->getOperand(0); + unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + EVT OldEltVT = OldVec.getValueType().getVectorElementType(); + SDLoc dl(N); + + // Convert to a vector of the expanded element type, for example + // <3 x i64> -> <6 x i32>. + EVT OldVT = N->getValueType(0); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); + + if (OldVT != OldEltVT) { + // The result of EXTRACT_VECTOR_ELT may be larger than the element type of + // the input vector. If so, extend the elements of the input vector to the + // same bitwidth as the result before expanding. + assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!"); + EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts); + OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0)); + } + + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, + EVT::getVectorVT(*DAG.getContext(), + NewVT, 2*OldElts), + OldVec); + + // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. + SDValue Idx = N->getOperand(1); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + DAG.getConstant(1, dl, Idx.getValueType())); + Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); + SDLoc dl(N); + + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT ValueVT = LD->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + unsigned Alignment = LD->getAlignment(); + AAMDNodes AAInfo = LD->getAAInfo(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), Alignment, + LD->getMemOperand()->getFlags(), AAInfo); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits() / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Hi = DAG.getLoad(NVT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + MinAlign(Alignment, IncrementSize), + LD->getMemOperand()->getFlags(), AAInfo); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Handle endianness of the load. + if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + +void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + SDLoc dl(N); + const unsigned Align = N->getConstantOperandVal(3); + + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); + Chain = Hi.getValue(1); + + // Handle endianness of the load. + if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + + +//===--------------------------------------------------------------------===// +// Generic Operand Expansion. +//===--------------------------------------------------------------------===// + +void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, + EVT EltVT) { + assert(Op.getValueType().isInteger()); + SDLoc DL(Op); + SDValue Parts[2]; + + if (NumElements > 1) { + NumElements >>= 1; + SplitInteger(Op, Parts[0], Parts[1]); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Parts[0], Parts[1]); + IntegerToVector(Parts[0], NumElements, Ops, EltVT); + IntegerToVector(Parts[1], NumElements, Ops, EltVT); + } else { + Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op)); + } +} + +SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { + SDLoc dl(N); + if (N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isInteger()) { + // An illegal expanding type is being converted to a legal vector type. + // Make a two element vector out of the expanded parts and convert that + // instead, but only if the new vector type is legal (otherwise there + // is no point, and it might create expansion loops). For example, on + // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32. + // + // FIXME: I'm not sure why we are first trying to split the input into + // a 2 element vector, so I'm leaving it here to maintain the current + // behavior. + unsigned NumElts = 2; + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), + TLI.getTypeToTransformTo(*DAG.getContext(), OVT), + NumElts); + if (!isTypeLegal(NVT)) { + // If we can't find a legal type by splitting the integer in half, + // then we can use the node's value type. + NumElts = N->getValueType(0).getVectorNumElements(); + NVT = N->getValueType(0); + } + + SmallVector<SDValue, 8> Ops; + IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); + + SDValue Vec = + DAG.getBuildVector(NVT, dl, makeArrayRef(Ops.data(), NumElts)); + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); + } + + // Otherwise, store to a temporary and load out again as the new type. + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type needs expansion. + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + EVT OldVT = N->getOperand(0).getValueType(); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); + SDLoc dl(N); + + assert(OldVT == VecVT.getVectorElementType() && + "BUILD_VECTOR operand type doesn't match vector element type!"); + + // Build a vector of twice the length out of the expanded elements. + // For example <3 x i64> -> <6 x i32>. + SmallVector<SDValue, 16> NewElts; + NewElts.reserve(NumElts*2); + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(i), Lo, Hi); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + NewElts.push_back(Lo); + NewElts.push_back(Hi); + } + + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()); + SDValue NewVec = DAG.getBuildVector(NewVecVT, dl, NewElts); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(0), Lo, Hi); + return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo; +} + +SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { + // The vector type is legal but the element type needs expansion. + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + SDLoc dl(N); + + SDValue Val = N->getOperand(1); + EVT OldEVT = Val.getValueType(); + EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT); + + assert(OldEVT == VecVT.getVectorElementType() && + "Inserted element type doesn't match vector element type!"); + + // Bitconvert to a vector of twice the length with elements of the expanded + // type, insert the expanded vector elements, and then convert back. + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2); + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, + NewVecVT, N->getOperand(0)); + + SDValue Lo, Hi; + GetExpandedOp(Val, Lo, Hi); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + SDValue Idx = N->getOperand(2); + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); + Idx = DAG.getNode(ISD::ADD, dl, + Idx.getValueType(), Idx, + DAG.getConstant(1, dl, Idx.getValueType())); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && + "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + Ops[0] = N->getOperand(0); + SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); + for (unsigned i = 1; i < NumElts; ++i) + Ops[i] = UndefVal; + return DAG.getBuildVector(VT, dl, Ops); +} + +SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { + assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + SDLoc dl(N); + + StoreSDNode *St = cast<StoreSDNode>(N); + EVT ValueVT = St->getValue().getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); + SDValue Chain = St->getChain(); + SDValue Ptr = St->getBasePtr(); + unsigned Alignment = St->getAlignment(); + AAMDNodes AAInfo = St->getAAInfo(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + unsigned IncrementSize = NVT.getSizeInBits() / 8; + + SDValue Lo, Hi; + GetExpandedOp(St->getValue(), Lo, Hi); + + if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment, + St->getMemOperand()->getFlags(), AAInfo); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Hi = DAG.getStore(Chain, dl, Hi, Ptr, + St->getPointerInfo().getWithOffset(IncrementSize), + MinAlign(Alignment, IncrementSize), + St->getMemOperand()->getFlags(), AAInfo); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + + +//===--------------------------------------------------------------------===// +// Generic Result Splitting. +//===--------------------------------------------------------------------===// + +// Be careful to make no assumptions about which of Lo/Hi is stored first in +// memory (for vectors it is always Lo first followed by Hi in the following +// bytes; for integers and floats it is Lo first if and only if the machine is +// little-endian). + +void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + GetSplitOp(Op, Lo, Hi); +} + +void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue LL, LH, RL, RH, CL, CH; + SDLoc dl(N); + GetSplitOp(N->getOperand(1), LL, LH); + GetSplitOp(N->getOperand(2), RL, RH); + + SDValue Cond = N->getOperand(0); + CL = CH = Cond; + if (Cond.getValueType().isVector()) { + if (SDValue Res = WidenVSELECTAndMask(N)) + std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl); + // Check if there are already splitted versions of the vector available and + // use those instead of splitting the mask operand again. + else if (getTypeAction(Cond.getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(Cond, CL, CH); + // It seems to improve code to generate two narrow SETCCs as opposed to + // splitting a wide result vector. + else if (Cond.getOpcode() == ISD::SETCC) { + // If the condition is a vXi1 vector, and the LHS of the setcc is a legal + // type and the setcc result type is the same vXi1, then leave the setcc + // alone. + EVT CondLHSVT = Cond.getOperand(0).getValueType(); + if (Cond.getValueType().getVectorElementType() == MVT::i1 && + isTypeLegal(CondLHSVT) && + getSetCCResultType(CondLHSVT) == Cond.getValueType()) + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); + else + SplitVecRes_SETCC(Cond.getNode(), CL, CH); + } else + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); + } + + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH); +} + +void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH; + SDLoc dl(N); + GetSplitOp(N->getOperand(2), LL, LH); + GetSplitOp(N->getOperand(3), RL, RH); + + Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0), + N->getOperand(1), LL, RL, N->getOperand(4)); + Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0), + N->getOperand(1), LH, RH, N->getOperand(4)); +} + +void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getUNDEF(LoVT); + Hi = DAG.getUNDEF(HiVT); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp new file mode 100644 index 0000000000000..15c3a0b6cfadf --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -0,0 +1,1444 @@ +//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeVectors method. +// +// The vector legalizer looks for vector operations which might need to be +// scalarized and legalizes them. This is a separate step from Legalize because +// scalarizing can introduce illegal types. For example, suppose we have an +// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition +// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the +// operation, which introduces nodes with the illegal type i64 which must be +// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; +// the operation must be unrolled, which introduces nodes with the illegal +// type i8 which must be promoted. +// +// This does not legalize vector manipulations like ISD::BUILD_VECTOR, +// or operations that happen to take a vector which are custom-lowered; +// the legalization for such operations never produces nodes +// with illegal types, so it's okay to put off legalizing them until +// SelectionDAG::Legalize runs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "legalizevectorops" + +namespace { + +class VectorLegalizer { + SelectionDAG& DAG; + const TargetLowering &TLI; + bool Changed = false; // Keep track of whether anything changed + + /// For nodes that are of legal width, and that have more than one use, this + /// map indicates what regularized operand to use. This allows us to avoid + /// legalizing the same thing more than once. + SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; + + /// Adds a node to the translation cache. + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + + /// Legalizes the given node. + SDValue LegalizeOp(SDValue Op); + + /// Assuming the node is legal, "legalize" the results. + SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); + + /// Implements unrolling a VSETCC. + SDValue UnrollVSETCC(SDValue Op); + + /// Implement expand-based legalization of vector operations. + /// + /// This is just a high-level routine to dispatch to specific code paths for + /// operations to legalize them. + SDValue Expand(SDValue Op); + + /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if + /// FP_TO_SINT isn't legal. + SDValue ExpandFP_TO_UINT(SDValue Op); + + /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if + /// SINT_TO_FLOAT and SHR on vectors isn't legal. + SDValue ExpandUINT_TO_FLOAT(SDValue Op); + + /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + SDValue ExpandSEXTINREG(SDValue Op); + + /// Implement expansion for ANY_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and bitcasts to the proper + /// type. The contents of the bits in the extended part of each element are + /// undef. + SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); + + /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place, bitcasts to the proper + /// type, then shifts left and arithmetic shifts right to introduce a sign + /// extension. + SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); + + /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and blends zeros into + /// the remaining lanes, finally bitcasting to the proper type. + SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); + + /// Implement expand-based legalization of ABS vector operations. + /// If following expanding is legal/custom then do it: + /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1)) + /// else unroll the operation. + SDValue ExpandABS(SDValue Op); + + /// Expand bswap of vectors into a shuffle if legal. + SDValue ExpandBSWAP(SDValue Op); + + /// Implement vselect in terms of XOR, AND, OR when blend is not + /// supported by the target. + SDValue ExpandVSELECT(SDValue Op); + SDValue ExpandSELECT(SDValue Op); + SDValue ExpandLoad(SDValue Op); + SDValue ExpandStore(SDValue Op); + SDValue ExpandFNEG(SDValue Op); + SDValue ExpandFSUB(SDValue Op); + SDValue ExpandBITREVERSE(SDValue Op); + SDValue ExpandCTPOP(SDValue Op); + SDValue ExpandCTLZ(SDValue Op); + SDValue ExpandCTTZ(SDValue Op); + SDValue ExpandFunnelShift(SDValue Op); + SDValue ExpandROT(SDValue Op); + SDValue ExpandFMINNUM_FMAXNUM(SDValue Op); + SDValue ExpandUADDSUBO(SDValue Op); + SDValue ExpandSADDSUBO(SDValue Op); + SDValue ExpandMULO(SDValue Op); + SDValue ExpandAddSubSat(SDValue Op); + SDValue ExpandFixedPointMul(SDValue Op); + SDValue ExpandStrictFPOp(SDValue Op); + + /// Implements vector promotion. + /// + /// This is essentially just bitcasting the operands to a different type and + /// bitcasting the result back to the original type. + SDValue Promote(SDValue Op); + + /// Implements [SU]INT_TO_FP vector promotion. + /// + /// This is a [zs]ext of the input operand to a larger integer type. + SDValue PromoteINT_TO_FP(SDValue Op); + + /// Implements FP_TO_[SU]INT vector promotion of the result type. + /// + /// It is promoted to a larger integer type. The result is then + /// truncated back to the original type. + SDValue PromoteFP_TO_INT(SDValue Op); + +public: + VectorLegalizer(SelectionDAG& dag) : + DAG(dag), TLI(dag.getTargetLoweringInfo()) {} + + /// Begin legalizer the vector operations in the DAG. + bool Run(); +}; + +} // end anonymous namespace + +bool VectorLegalizer::Run() { + // Before we start legalizing vector nodes, check if there are any vectors. + bool HasVectors = false; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { + // Check if the values of the nodes contain vectors. We don't need to check + // the operands because we are going to check their values at some point. + for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); + J != E; ++J) + HasVectors |= J->isVector(); + + // If we found a vector node we can start the legalization. + if (HasVectors) + break; + } + + // If this basic block has no vectors then no need to legalize vectors. + if (!HasVectors) + return false; + + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) + LegalizeOp(SDValue(&*I, 0)); + + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); + + return Changed; +} + +SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { + // Generic legalization: just pass the operand through. + for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) + AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); + return Result.getValue(Op.getResNo()); +} + +SDValue VectorLegalizer::LegalizeOp(SDValue Op) { + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDNode* Node = Op.getNode(); + + // Legalize the operands + SmallVector<SDValue, 8> Ops; + for (const SDValue &Op : Node->op_values()) + Ops.push_back(LegalizeOp(Op)); + + SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), + Op.getResNo()); + + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { + LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: "; + Node->dump(&DAG)); + switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), + LD->getMemoryVT())) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + return TranslateLegalizeResults(Op, Result); + case TargetLowering::Custom: + if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { + assert(Lowered->getNumValues() == Op->getNumValues() && + "Unexpected number of results"); + if (Lowered != Result) { + // Make sure the new code is also legal. + Lowered = LegalizeOp(Lowered); + Changed = true; + } + return TranslateLegalizeResults(Op, Lowered); + } + LLVM_FALLTHROUGH; + case TargetLowering::Expand: + Changed = true; + return ExpandLoad(Op); + } + } + } else if (Op.getOpcode() == ISD::STORE) { + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + EVT StVT = ST->getMemoryVT(); + MVT ValVT = ST->getValue().getSimpleValueType(); + if (StVT.isVector() && ST->isTruncatingStore()) { + LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: "; + Node->dump(&DAG)); + switch (TLI.getTruncStoreAction(ValVT, StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + return TranslateLegalizeResults(Op, Result); + case TargetLowering::Custom: { + SDValue Lowered = TLI.LowerOperation(Result, DAG); + if (Lowered != Result) { + // Make sure the new code is also legal. + Lowered = LegalizeOp(Lowered); + Changed = true; + } + return TranslateLegalizeResults(Op, Lowered); + } + case TargetLowering::Expand: + Changed = true; + return ExpandStore(Op); + } + } + } + + bool HasVectorValueOrOp = false; + for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J) + HasVectorValueOrOp |= J->isVector(); + for (const SDValue &Op : Node->op_values()) + HasVectorValueOrOp |= Op.getValueType().isVector(); + + if (!HasVectorValueOrOp) + return TranslateLegalizeResults(Op, Result); + + TargetLowering::LegalizeAction Action = TargetLowering::Legal; + switch (Op.getOpcode()) { + default: + return TranslateLegalizeResults(Op, Result); + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + // If we're asked to expand a strict vector floating-point operation, + // by default we're going to simply unroll it. That is usually the + // best approach, except in the case where the resulting strict (scalar) + // operations would themselves use the fallback mutation to non-strict. + // In that specific case, just do the fallback on the vector op. + if (Action == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) { + EVT EltVT = Node->getValueType(0).getVectorElementType(); + if (TLI.getOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Legal) + Action = TargetLowering::Legal; + } + break; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + case ISD::SDIVREM: + case ISD::UDIVREM: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::FSHL: + case ISD::FSHR: + case ISD::ROTL: + case ISD::ROTR: + case ISD::ABS: + case ISD::BSWAP: + case ISD::BITREVERSE: + case ISD::CTLZ: + case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: + case ISD::SELECT: + case ISD::VSELECT: + case ISD::SELECT_CC: + case ISD::SETCC: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FNEG: + case ISD::FABS: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNUM_IEEE: + case ISD::FMAXNUM_IEEE: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: + case ISD::FCOPYSIGN: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FPOWI: + case ISD::FPOW: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FROUND: + case ISD::FFLOOR: + case ISD::FP_ROUND: + case ISD::FP_EXTEND: + case ISD::FMA: + case ISD::SIGN_EXTEND_INREG: + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + case ISD::FCANONICALIZE: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + break; + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: { + unsigned Scale = Node->getConstantOperandVal(2); + Action = TLI.getFixedPointOperationAction(Node->getOpcode(), + Node->getValueType(0), Scale); + break; + } + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; + } + + LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); + + switch (Action) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Promote: + Result = Promote(Op); + Changed = true; + break; + case TargetLowering::Legal: + LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); + break; + case TargetLowering::Custom: { + LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); + if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { + LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); + Result = Tmp1; + break; + } + LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); + LLVM_FALLTHROUGH; + } + case TargetLowering::Expand: + Result = Expand(Op); + } + + // Make sure that the generated code is itself legal. + if (Result != Op) { + Result = LegalizeOp(Result); + Changed = true; + } + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +SDValue VectorLegalizer::Promote(SDValue Op) { + // For a few operations there is a specific concept for promotion based on + // the operand's type. + switch (Op.getOpcode()) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // "Promote" the operation by extending the operand. + return PromoteINT_TO_FP(Op); + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + // Promote the operation by extending the operand. + return PromoteFP_TO_INT(Op); + } + + // There are currently two cases of vector promotion: + // 1) Bitcasting a vector of integers to a different type to a vector of the + // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. + // 2) Extending a vector of floats to a vector of the same number of larger + // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. + MVT VT = Op.getSimpleValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + SDLoc dl(Op); + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + if (Op.getOperand(j) + .getValueType() + .getVectorElementType() + .isFloatingPoint() && + NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) + Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j)); + else + Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags()); + if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || + (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && + NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) + return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl)); + else + return DAG.getNode(ISD::BITCAST, dl, VT, Op); +} + +SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { + // INT_TO_FP operations may require the input operand be promoted even + // when the type is otherwise legal. + MVT VT = Op.getOperand(0).getSimpleValueType(); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && + "Vectors have different number of elements!"); + + SDLoc dl(Op); + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + + unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : + ISD::SIGN_EXTEND; + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands); +} + +// For FP_TO_INT we promote the result type to a vector type with wider +// elements and then truncate the result. This is different from the default +// PromoteVector which uses bitcast to promote thus assumning that the +// promoted vector type has the same overall size. +SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) { + MVT VT = Op.getSimpleValueType(); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && + "Vectors have different number of elements!"); + + unsigned NewOpc = Op->getOpcode(); + // Change FP_TO_UINT to FP_TO_SINT if possible. + // TODO: Should we only do this if FP_TO_UINT itself isn't legal? + if (NewOpc == ISD::FP_TO_UINT && + TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) + NewOpc = ISD::FP_TO_SINT; + + SDLoc dl(Op); + SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext + : ISD::AssertSext, + dl, NVT, Promoted, + DAG.getValueType(VT.getScalarType())); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); +} + +SDValue VectorLegalizer::ExpandLoad(SDValue Op) { + LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + + EVT SrcVT = LD->getMemoryVT(); + EVT SrcEltVT = SrcVT.getScalarType(); + unsigned NumElem = SrcVT.getVectorNumElements(); + + SDValue NewChain; + SDValue Value; + if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { + SDLoc dl(Op); + + SmallVector<SDValue, 8> Vals; + SmallVector<SDValue, 8> LoadChains; + + EVT DstEltVT = LD->getValueType(0).getScalarType(); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + // When elements in a vector is not byte-addressable, we cannot directly + // load each element by advancing pointer, which could only address bytes. + // Instead, we load all significant words, mask bits off, and concatenate + // them to form each element. Finally, they are extended to destination + // scalar type to build the destination vector. + EVT WideVT = TLI.getPointerTy(DAG.getDataLayout()); + + assert(WideVT.isRound() && + "Could not handle the sophisticated case when the widest integer is" + " not power of 2."); + assert(WideVT.bitsGE(SrcEltVT) && + "Type is not legalized?"); + + unsigned WideBytes = WideVT.getStoreSize(); + unsigned Offset = 0; + unsigned RemainingBytes = SrcVT.getStoreSize(); + SmallVector<SDValue, 8> LoadVals; + while (RemainingBytes > 0) { + SDValue ScalarLoad; + unsigned LoadBytes = WideBytes; + + if (RemainingBytes >= LoadBytes) { + ScalarLoad = + DAG.getLoad(WideVT, dl, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + MinAlign(LD->getAlignment(), Offset), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + } else { + EVT LoadVT = WideVT; + while (RemainingBytes < LoadBytes) { + LoadBytes >>= 1; // Reduce the load size by half. + LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); + } + ScalarLoad = + DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), LoadVT, + MinAlign(LD->getAlignment(), Offset), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + } + + RemainingBytes -= LoadBytes; + Offset += LoadBytes; + + BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + unsigned BitOffset = 0; + unsigned WideIdx = 0; + unsigned WideBits = WideVT.getSizeInBits(); + + // Extract bits, pack and extend/trunc them into destination type. + unsigned SrcEltBits = SrcEltVT.getSizeInBits(); + SDValue SrcEltBitMask = DAG.getConstant( + APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT); + + for (unsigned Idx = 0; Idx != NumElem; ++Idx) { + assert(BitOffset < WideBits && "Unexpected offset!"); + + SDValue ShAmt = DAG.getConstant( + BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); + SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); + + BitOffset += SrcEltBits; + if (BitOffset >= WideBits) { + WideIdx++; + BitOffset -= WideBits; + if (BitOffset > 0) { + ShAmt = DAG.getConstant( + SrcEltBits - BitOffset, dl, + TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); + SDValue Hi = + DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); + } + } + + Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); + + switch (ExtType) { + default: llvm_unreachable("Unknown extended-load op!"); + case ISD::EXTLOAD: + Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::ZEXTLOAD: + Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::SEXTLOAD: + ShAmt = + DAG.getConstant(WideBits - SrcEltBits, dl, + TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); + Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); + Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); + Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); + break; + } + Vals.push_back(Lo); + } + + NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals); + } else { + SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); + // Skip past MERGE_VALUE node if known. + if (Scalarized->getOpcode() == ISD::MERGE_VALUES) { + NewChain = Scalarized.getOperand(1); + Value = Scalarized.getOperand(0); + } else { + NewChain = Scalarized.getValue(1); + Value = Scalarized.getValue(0); + } + } + + AddLegalizedOperand(Op.getValue(0), Value); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return (Op.getResNo() ? NewChain : Value); +} + +SDValue VectorLegalizer::ExpandStore(SDValue Op) { + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + SDValue TF = TLI.scalarizeVectorStore(ST, DAG); + AddLegalizedOperand(Op, TF); + return TF; +} + +SDValue VectorLegalizer::Expand(SDValue Op) { + switch (Op->getOpcode()) { + case ISD::SIGN_EXTEND_INREG: + return ExpandSEXTINREG(Op); + case ISD::ANY_EXTEND_VECTOR_INREG: + return ExpandANY_EXTEND_VECTOR_INREG(Op); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return ExpandSIGN_EXTEND_VECTOR_INREG(Op); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return ExpandZERO_EXTEND_VECTOR_INREG(Op); + case ISD::BSWAP: + return ExpandBSWAP(Op); + case ISD::VSELECT: + return ExpandVSELECT(Op); + case ISD::SELECT: + return ExpandSELECT(Op); + case ISD::FP_TO_UINT: + return ExpandFP_TO_UINT(Op); + case ISD::UINT_TO_FP: + return ExpandUINT_TO_FLOAT(Op); + case ISD::FNEG: + return ExpandFNEG(Op); + case ISD::FSUB: + return ExpandFSUB(Op); + case ISD::SETCC: + return UnrollVSETCC(Op); + case ISD::ABS: + return ExpandABS(Op); + case ISD::BITREVERSE: + return ExpandBITREVERSE(Op); + case ISD::CTPOP: + return ExpandCTPOP(Op); + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + return ExpandCTLZ(Op); + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + return ExpandCTTZ(Op); + case ISD::FSHL: + case ISD::FSHR: + return ExpandFunnelShift(Op); + case ISD::ROTL: + case ISD::ROTR: + return ExpandROT(Op); + case ISD::FMINNUM: + case ISD::FMAXNUM: + return ExpandFMINNUM_FMAXNUM(Op); + case ISD::UADDO: + case ISD::USUBO: + return ExpandUADDSUBO(Op); + case ISD::SADDO: + case ISD::SSUBO: + return ExpandSADDSUBO(Op); + case ISD::UMULO: + case ISD::SMULO: + return ExpandMULO(Op); + case ISD::USUBSAT: + case ISD::SSUBSAT: + case ISD::UADDSAT: + case ISD::SADDSAT: + return ExpandAddSubSat(Op); + case ISD::SMULFIX: + case ISD::UMULFIX: + return ExpandFixedPointMul(Op); + case ISD::SMULFIXSAT: + case ISD::UMULFIXSAT: + // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly + // why. Maybe it results in worse codegen compared to the unroll for some + // targets? This should probably be investigated. And if we still prefer to + // unroll an explanation could be helpful. + return DAG.UnrollVectorOp(Op.getNode()); + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + return ExpandStrictFPOp(Op); + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + return TLI.expandVecReduce(Op.getNode(), DAG); + default: + return DAG.UnrollVectorOp(Op.getNode()); + } +} + +SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { + // Lower a select instruction where the condition is a scalar and the + // operands are vectors. Lower this select to VSELECT and implement it + // using XOR AND OR. The selector bit is broadcasted. + EVT VT = Op.getValueType(); + SDLoc DL(Op); + + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + assert(VT.isVector() && !Mask.getValueType().isVector() + && Op1.getValueType() == Op2.getValueType() && "Invalid type"); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + // Also, we need to be able to construct a splat vector using BUILD_VECTOR. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + // Generate a mask operand. + EVT MaskTy = VT.changeVectorElementTypeToInteger(); + + // What is the size of each element in the vector mask. + EVT BitTy = MaskTy.getScalarType(); + + Mask = DAG.getSelect(DL, BitTy, Mask, + DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, + BitTy), + DAG.getConstant(0, DL, BitTy)); + + // Broadcast the mask so that the entire vector is all-one or all zero. + Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask); + + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); + SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); +} + +SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { + EVT VT = Op.getValueType(); + + // Make sure that the SRA and SHL instructions are available. + if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + SDLoc DL(Op); + EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); + + unsigned BW = VT.getScalarSizeInBits(); + unsigned OrigBW = OrigTy.getScalarSizeInBits(); + SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); + + Op = Op.getOperand(0); + Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); + return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); +} + +// Generically expand a vector anyext in register to a shuffle of the relevant +// lanes into the appropriate locations, with other lanes left undef. +SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector + // into a larger vector type. + if (SrcVT.bitsLE(VT)) { + assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && + "ANY_EXTEND_VECTOR_INREG vector size mismatch"); + NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); + SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), + NumSrcElements); + Src = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + // Build a base mask of undef shuffles. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.resize(NumSrcElements, -1); + + // Place the extended lanes into the correct locations. + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = i; + + return DAG.getNode( + ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); +} + +SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // First build an any-extend node which can be legalized above when we + // recurse through it. + Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src); + + // Now we need sign extend. Do this by shifting the elements. Even if these + // aren't legal operations, they have a better chance of being legalized + // without full scalarization than the sign extension does. + unsigned EltWidth = VT.getScalarSizeInBits(); + unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); + SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); + return DAG.getNode(ISD::SRA, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), + ShiftAmount); +} + +// Generically expand a vector zext in register to a shuffle of the relevant +// lanes into the appropriate locations, a blend of zero into the high bits, +// and a bitcast to the wider element type. +SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector + // into a larger vector type. + if (SrcVT.bitsLE(VT)) { + assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && + "ZERO_EXTEND_VECTOR_INREG vector size mismatch"); + NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); + SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), + NumSrcElements); + Src = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + // Build up a zero vector to blend into this one. + SDValue Zero = DAG.getConstant(0, DL, SrcVT); + + // Shuffle the incoming lanes into the correct position, and pull all other + // lanes from the zero vector. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.reserve(NumSrcElements); + for (int i = 0; i < NumSrcElements; ++i) + ShuffleMask.push_back(i); + + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); +} + +static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { + int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; + for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) + for (int J = ScalarSizeInBytes - 1; J >= 0; --J) + ShuffleMask.push_back((I * ScalarSizeInBytes) + J); +} + +SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { + EVT VT = Op.getValueType(); + + // Generate a byte wise shuffle mask for the BSWAP. + SmallVector<int, 16> ShuffleMask; + createBSWAPShuffleMask(VT, ShuffleMask); + EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); + + // Only emit a shuffle if the mask is legal. + if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) + return DAG.UnrollVectorOp(Op.getNode()); + + SDLoc DL(Op); + Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); +} + +SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { + EVT VT = Op.getValueType(); + + // If we have the scalar operation, it's probably cheaper to unroll it. + if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) + return DAG.UnrollVectorOp(Op.getNode()); + + // If the vector element width is a whole number of bytes, test if its legal + // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte + // vector. This greatly reduces the number of bit shifts necessary. + unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); + if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { + SmallVector<int, 16> BSWAPMask; + createBSWAPShuffleMask(VT, BSWAPMask); + + EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); + if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && + (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) || + (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) && + TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) { + SDLoc DL(Op); + Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), + BSWAPMask); + Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + } + + // If we have the appropriate vector bit operations, it is better to use them + // than unrolling and expanding each component. + if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || + !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || + !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) || + !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) + return DAG.UnrollVectorOp(Op.getNode()); + + // Let LegalizeDAG handle this later. + return Op; +} + +SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { + // Implement VSELECT in terms of XOR, AND, OR + // on platforms which do not support blend natively. + SDLoc DL(Op); + + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + EVT VT = Mask.getValueType(); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + // This operation also isn't safe with AND, OR, XOR when the boolean + // type is 0/1 as we need an all ones vector constant to mask with. + // FIXME: Sign extend 1 to all ones if thats legal on the target. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(Op1.getValueType()) != + TargetLowering::ZeroOrNegativeOneBooleanContent) + return DAG.UnrollVectorOp(Op.getNode()); + + // If the mask and the type are different sizes, unroll the vector op. This + // can occur when getSetCCResultType returns something that is different in + // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. + if (VT.getSizeInBits() != Op1.getValueSizeInBits()) + return DAG.UnrollVectorOp(Op.getNode()); + + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); + SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); +} + +SDValue VectorLegalizer::ExpandABS(SDValue Op) { + // Attempt to expand using TargetLowering. + SDValue Result; + if (TLI.expandABS(Op.getNode(), Result, DAG)) + return Result; + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) { + // Attempt to expand using TargetLowering. + SDValue Result, Chain; + if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) { + if (Op.getNode()->isStrictFPOpcode()) + // Relink the chain + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain); + return Result; + } + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { + EVT VT = Op.getOperand(0).getValueType(); + SDLoc DL(Op); + + // Attempt to expand using TargetLowering. + SDValue Result; + if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG)) + return Result; + + // Make sure that the SINT_TO_FP and SRL instructions are available. + if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + unsigned BW = VT.getScalarSizeInBits(); + assert((BW == 64 || BW == 32) && + "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); + + SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT); + + // Constants to clear the upper part of the word. + // Notice that we can also use SHL+SHR, but using a constant is slightly + // faster on x86. + uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; + SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); + + // Two to the power of half-word-size. + SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType()); + + // Clear upper part of LO, lower HI + SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); + SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); + + // Convert hi and lo to floats + // Convert the hi part back to the upper values + // TODO: Can any fast-math-flags be set on these nodes? + SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); + fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); + SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); + + // Add the two halves + return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); +} + +SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { + if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { + SDLoc DL(Op); + SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); + // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. + return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), + Zero, Op.getOperand(0)); + } + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandFSUB(SDValue Op) { + // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, + // we can defer this to operation legalization where it will be lowered as + // a+(-b). + EVT VT = Op.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && + TLI.isOperationLegalOrCustom(ISD::FADD, VT)) + return Op; // Defer to LegalizeDAG + + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) { + SDValue Result; + if (TLI.expandCTPOP(Op.getNode(), Result, DAG)) + return Result; + + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { + SDValue Result; + if (TLI.expandCTLZ(Op.getNode(), Result, DAG)) + return Result; + + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) { + SDValue Result; + if (TLI.expandCTTZ(Op.getNode(), Result, DAG)) + return Result; + + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) { + SDValue Result; + if (TLI.expandFunnelShift(Op.getNode(), Result, DAG)) + return Result; + + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandROT(SDValue Op) { + SDValue Result; + if (TLI.expandROT(Op.getNode(), Result, DAG)) + return Result; + + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) { + if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG)) + return Expanded; + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) { + SDValue Result, Overflow; + TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG); + + if (Op.getResNo() == 0) { + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); + return Result; + } else { + AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); + return Overflow; + } +} + +SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) { + SDValue Result, Overflow; + TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG); + + if (Op.getResNo() == 0) { + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); + return Result; + } else { + AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); + return Overflow; + } +} + +SDValue VectorLegalizer::ExpandMULO(SDValue Op) { + SDValue Result, Overflow; + if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG)) + std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode()); + + if (Op.getResNo() == 0) { + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); + return Result; + } else { + AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); + return Overflow; + } +} + +SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) { + if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG)) + return Expanded; + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) { + if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG)) + return Expanded; + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + unsigned NumOpers = Op.getNumOperands(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT ValueVTs[] = {EltVT, MVT::Other}; + SDValue Chain = Op.getOperand(0); + SDLoc dl(Op); + + SmallVector<SDValue, 32> OpValues; + SmallVector<SDValue, 32> OpChains; + for (unsigned i = 0; i < NumElems; ++i) { + SmallVector<SDValue, 4> Opers; + SDValue Idx = DAG.getConstant(i, dl, + TLI.getVectorIdxTy(DAG.getDataLayout())); + + // The Chain is the first operand. + Opers.push_back(Chain); + + // Now process the remaining operands. + for (unsigned j = 1; j < NumOpers; ++j) { + SDValue Oper = Op.getOperand(j); + EVT OperVT = Oper.getValueType(); + + if (OperVT.isVector()) + Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + OperVT.getVectorElementType(), Oper, Idx); + + Opers.push_back(Oper); + } + + SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); + + OpValues.push_back(ScalarOp.getValue(0)); + OpChains.push_back(ScalarOp.getValue(1)); + } + + SDValue Result = DAG.getBuildVector(VT, dl, OpValues); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); + + AddLegalizedOperand(Op.getValue(0), Result); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return Op.getResNo() ? NewChain : Result; +} + +SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { + EVT VT = Op.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); + EVT TmpEltVT = LHS.getValueType().getVectorElementType(); + SDLoc dl(Op); + SmallVector<SDValue, 8> Ops(NumElems); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue LHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue RHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[i] = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), TmpEltVT), + LHSElem, RHSElem, CC); + Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), dl, EltVT), + DAG.getConstant(0, dl, EltVT)); + } + return DAG.getBuildVector(VT, dl, Ops); +} + +bool SelectionDAG::LegalizeVectors() { + return VectorLegalizer(*this).Run(); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp new file mode 100644 index 0000000000000..3763e886cef29 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -0,0 +1,5122 @@ +//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file performs vector type splitting and scalarization for LegalizeTypes. +// Scalarization is the act of changing a computation in an illegal one-element +// vector type to be a computation in its scalar element type. For example, +// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed +// as a base case when scalarizing vector arithmetic like <4 x f32>, which +// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 +// types. +// Splitting is the act of changing a computation in an invalid vector type to +// be a computation in two vectors of half the size. For example, implementing +// <128 x f32> operations in terms of two <64 x f32> operations. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "legalize-types" + +//===----------------------------------------------------------------------===// +// Result Vector Scalarization: <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { + LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ScalarizeVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to scalarize the result of this " + "operator!\n"); + + case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; + case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; + case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; + case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break; + case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; + case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; + case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; + case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; + case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; + case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; + case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + R = ScalarizeVecRes_VecInregOp(N); + break; + case ISD::ABS: + case ISD::ANY_EXTEND: + case ISD::BITREVERSE: + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::ZERO_EXTEND: + case ISD::FCANONICALIZE: + R = ScalarizeVecRes_UnaryOp(N); + break; + + case ISD::ADD: + case ISD::AND: + case ISD::FADD: + case ISD::FCOPYSIGN: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNUM_IEEE: + case ISD::FMAXNUM_IEEE: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: + + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::SUB: + case ISD::UDIV: + case ISD::UREM: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + R = ScalarizeVecRes_BinOp(N); + break; + case ISD::FMA: + R = ScalarizeVecRes_TernaryOp(N); + break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_EXTEND: + R = ScalarizeVecRes_StrictFPOp(N); + break; + case ISD::UADDO: + case ISD::SADDO: + case ISD::USUBO: + case ISD::SSUBO: + case ISD::UMULO: + case ISD::SMULO: + R = ScalarizeVecRes_OverflowOp(N, ResNo); + break; + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: + R = ScalarizeVecRes_MULFIX(N); + break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetScalarizedVector(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS, N->getFlags()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + SDValue Op1 = GetScalarizedVector(N->getOperand(1)); + SDValue Op2 = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + Op0.getValueType(), Op0, Op1, Op2); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) { + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + SDValue Op1 = GetScalarizedVector(N->getOperand(1)); + SDValue Op2 = N->getOperand(2); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, + Op2); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { + EVT VT = N->getValueType(0).getVectorElementType(); + unsigned NumOpers = N->getNumOperands(); + SDValue Chain = N->getOperand(0); + EVT ValueVTs[] = {VT, MVT::Other}; + SDLoc dl(N); + + SmallVector<SDValue, 4> Opers; + + // The Chain is the first operand. + Opers.push_back(Chain); + + // Now process the remaining operands. + for (unsigned i = 1; i < NumOpers; ++i) { + SDValue Oper = N->getOperand(i); + + if (Oper.getValueType().isVector()) + Oper = GetScalarizedVector(Oper); + + Opers.push_back(Oper); + } + + SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N, + unsigned ResNo) { + SDLoc DL(N); + EVT ResVT = N->getValueType(0); + EVT OvVT = N->getValueType(1); + + SDValue ScalarLHS, ScalarRHS; + if (getTypeAction(ResVT) == TargetLowering::TypeScalarizeVector) { + ScalarLHS = GetScalarizedVector(N->getOperand(0)); + ScalarRHS = GetScalarizedVector(N->getOperand(1)); + } else { + SmallVector<SDValue, 1> ElemsLHS, ElemsRHS; + DAG.ExtractVectorElements(N->getOperand(0), ElemsLHS); + DAG.ExtractVectorElements(N->getOperand(1), ElemsRHS); + ScalarLHS = ElemsLHS[0]; + ScalarRHS = ElemsRHS[0]; + } + + SDVTList ScalarVTs = DAG.getVTList( + ResVT.getVectorElementType(), OvVT.getVectorElementType()); + SDNode *ScalarNode = DAG.getNode( + N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode(); + + // Replace the other vector result not being explicitly scalarized here. + unsigned OtherNo = 1 - ResNo; + EVT OtherVT = N->getValueType(OtherNo); + if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) { + SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo)); + } else { + SDValue OtherVal = DAG.getNode( + ISD::SCALAR_TO_VECTOR, DL, OtherVT, SDValue(ScalarNode, OtherNo)); + ReplaceValueWith(SDValue(N, OtherNo), OtherVal); + } + + return SDValue(ScalarNode, ResNo); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return GetScalarizedVector(Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { + SDValue Op = N->getOperand(0); + if (Op.getValueType().isVector() + && Op.getValueType().getVectorNumElements() == 1 + && !isSimpleLegalType(Op.getValueType())) + Op = GetScalarizedVector(Op); + EVT NewVT = N->getValueType(0).getVectorElementType(); + return DAG.getNode(ISD::BITCAST, SDLoc(N), + NewVT, Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + // The BUILD_VECTOR operands may be of wider element types and + // we may need to truncate them back to the requested return type. + if (EltVT.isInteger()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + N->getValueType(0).getVectorElementType(), + N->getOperand(0), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), + NewVT, Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), + { NewVT, MVT::Other }, + { N->getOperand(0), Op, N->getOperand(2) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FPOWI, SDLoc(N), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { + // The value to insert may have a wider type than the vector element type, + // so be sure to truncate it to the element type if necessary. + SDValue Op = N->getOperand(1); + EVT EltVT = N->getValueType(0).getVectorElementType(); + if (Op.getValueType() != EltVT) + // FIXME: Can this happen for floating point types? + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op); + return Op; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { + assert(N->isUnindexed() && "Indexed vector load?"); + + SDValue Result = DAG.getLoad( + ISD::UNINDEXED, N->getExtensionType(), + N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(), + N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), + N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), + N->getOriginalAlignment(), N->getMemOperand()->getFlags(), + N->getAAInfo()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { + // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. + EVT DestVT = N->getValueType(0).getVectorElementType(); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + SDLoc DL(N); + // The result needs scalarizing, but it's not a given that the source does. + // This is a workaround for targets where it's impossible to scalarize the + // result of a conversion, because the source type is legal. + // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} + // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is + // legal and was not scalarized. + // See the similar logic in ScalarizeVecRes_SETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + EVT VT = OpVT.getVectorElementType(); + Op = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType(); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT, + LHS, DAG.getValueType(ExtVT)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) { + SDLoc DL(N); + SDValue Op = N->getOperand(0); + + EVT OpVT = Op.getValueType(); + EVT OpEltVT = OpVT.getVectorElementType(); + EVT EltVT = N->getValueType(0).getVectorElementType(); + + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + Op = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + switch (N->getOpcode()) { + case ISD::ANY_EXTEND_VECTOR_INREG: + return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op); + } + + llvm_unreachable("Illegal extend_vector_inreg opcode"); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { + SDValue Cond = N->getOperand(0); + EVT OpVT = Cond.getValueType(); + SDLoc DL(N); + // The vselect result and true/value operands needs scalarizing, but it's + // not a given that the Cond does. For instance, in AVX512 v1i1 is legal. + // See the similar logic in ScalarizeVecRes_SETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Cond = GetScalarizedVector(Cond); + } else { + EVT VT = OpVT.getVectorElementType(); + Cond = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + TargetLowering::BooleanContent ScalarBool = + TLI.getBooleanContents(false, false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false); + + // If integer and float booleans have different contents then we can't + // reliably optimize in all cases. There is a full explanation for this in + // DAGCombiner::visitSELECT() where the same issue affects folding + // (select C, 0, 1) to (xor C, 1). + if (TLI.getBooleanContents(false, false) != + TLI.getBooleanContents(false, true)) { + // At least try the common case where the boolean is generated by a + // comparison. + if (Cond->getOpcode() == ISD::SETCC) { + EVT OpVT = Cond->getOperand(0).getValueType(); + ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); + VecBool = TLI.getBooleanContents(OpVT); + } else + ScalarBool = TargetLowering::UndefinedBooleanContent; + } + + EVT CondVT = Cond.getValueType(); + if (ScalarBool != VecBool) { + switch (ScalarBool) { + case TargetLowering::UndefinedBooleanContent: + break; + case TargetLowering::ZeroOrOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); + // Vector read from all ones, scalar expects a single 1 so mask. + Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT, + Cond, DAG.getConstant(1, SDLoc(N), CondVT)); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrOneBooleanContent); + // Vector reads from a one, scalar from all ones so sign extend. + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT, + Cond, DAG.getValueType(MVT::i1)); + break; + } + } + + // Truncate the condition if needed + auto BoolVT = getSetCCResultType(CondVT); + if (BoolVT.bitsLT(CondVT)) + Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond); + + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), + N->getOperand(0), N->getOperand(1), + LHS, GetScalarizedVector(N->getOperand(3)), + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { + // Figure out if the scalar is the LHS or RHS and return it. + SDValue Arg = N->getOperand(2).getOperand(0); + if (Arg.isUndef()) + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); + unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue(); + return GetScalarizedVector(N->getOperand(Op)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT OpVT = LHS.getValueType(); + EVT NVT = N->getValueType(0).getVectorElementType(); + SDLoc DL(N); + + // The result needs scalarizing, but it's not a given that the source does. + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + LHS = GetScalarizedVector(LHS); + RHS = GetScalarizedVector(RHS); + } else { + EVT VT = OpVT.getVectorElementType(); + LHS = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + RHS = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, + N->getOperand(2)); + // Vectors may have a different boolean contents to scalars. Promote the + // value appropriately. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, DL, NVT, Res); +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Scalarization <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { + LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + if (!Res.getNode()) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to scalarize this operator's " + "operand!\n"); + case ISD::BITCAST: + Res = ScalarizeVecOp_BITCAST(N); + break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::TRUNCATE: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + Res = ScalarizeVecOp_UnaryOp(N); + break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + Res = ScalarizeVecOp_UnaryOp_StrictFP(N); + break; + case ISD::CONCAT_VECTORS: + Res = ScalarizeVecOp_CONCAT_VECTORS(N); + break; + case ISD::EXTRACT_VECTOR_ELT: + Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); + break; + case ISD::VSELECT: + Res = ScalarizeVecOp_VSELECT(N); + break; + case ISD::SETCC: + Res = ScalarizeVecOp_VSETCC(N); + break; + case ISD::STORE: + Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; + case ISD::STRICT_FP_ROUND: + Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo); + break; + case ISD::FP_ROUND: + Res = ScalarizeVecOp_FP_ROUND(N, OpNo); + break; + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Res = ScalarizeVecOp_VECREDUCE(N); + break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// If the value to convert is a vector that needs to be scalarized, it must be +/// <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::BITCAST, SDLoc(N), + N->getValueType(0), Elt); +} + +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>. +/// Do the operation on the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), + N->getValueType(0).getScalarType(), Elt); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op); +} + +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>. +/// Do the strict FP operation on the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), + { N->getValueType(0).getScalarType(), MVT::Other }, + { N->getOperand(0), Elt }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + +/// The vectors to concatenate have length one - use a BUILD_VECTOR instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { + SmallVector<SDValue, 8> Ops(N->getNumOperands()); + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + Ops[i] = GetScalarizedVector(N->getOperand(i)); + return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops); +} + +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>, +/// so just return the element, ignoring the index. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue Res = GetScalarizedVector(N->getOperand(0)); + if (Res.getValueType() != VT) + Res = VT.isFloatingPoint() + ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res) + : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res); + return Res; +} + +/// If the input condition is a vector that needs to be scalarized, it must be +/// <1 x i1>, so just convert to a normal ISD::SELECT +/// (still with vector output type since that was acceptable if we got here). +SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { + SDValue ScalarCond = GetScalarizedVector(N->getOperand(0)); + EVT VT = N->getValueType(0); + + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1), + N->getOperand(2)); +} + +/// If the operand is a vector that needs to be scalarized then the +/// result must be v1i1, so just convert to a scalar SETCC and wrap +/// with a scalar_to_vector since the res type is legal if we got here +SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type"); + + EVT VT = N->getValueType(0); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + + EVT OpVT = N->getOperand(0).getValueType(); + EVT NVT = VT.getVectorElementType(); + SDLoc DL(N); + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, + N->getOperand(2)); + + // Vectors may have a different boolean contents to scalars. Promote the + // value appropriately. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + + Res = DAG.getNode(ExtendCode, DL, NVT, Res); + + return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res); +} + +/// If the value to store is a vector that needs to be scalarized, it must be +/// <1 x ty>. Just store the element. +SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(N->isUnindexed() && "Indexed store of one-element vector?"); + assert(OpNo == 1 && "Do not know how to scalarize this operand!"); + SDLoc dl(N); + + if (N->isTruncatingStore()) + return DAG.getTruncStore( + N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getPointerInfo(), + N->getMemoryVT().getVectorElementType(), N->getAlignment(), + N->getMemOperand()->getFlags(), N->getAAInfo()); + + return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getPointerInfo(), + N->getOriginalAlignment(), N->getMemOperand()->getFlags(), + N->getAAInfo()); +} + +/// If the value to round is a vector that needs to be scalarized, it must be +/// <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0).getVectorElementType(), Elt, + N->getOperand(1)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + +SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Wrong operand for scalarization!"); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), + { N->getValueType(0).getVectorElementType(), + MVT::Other }, + { N->getOperand(0), Elt, N->getOperand(2) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + +SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) { + SDValue Res = GetScalarizedVector(N->getOperand(0)); + // Result type may be wider than element type. + if (Res.getValueType() != N->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res); + return Res; +} + +//===----------------------------------------------------------------------===// +// Result Vector Splitting +//===----------------------------------------------------------------------===// + +/// This method is called when the specified result of the specified node is +/// found to need vector splitting. At this point, the node may also have +/// invalid operands or may have other results that need legalization, we just +/// know that (at least) one result needs vector splitting. +void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { + LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Lo, Hi; + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SplitVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to split the result of this " + "operator!\n"); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; + case ISD::VSELECT: + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; + case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; + case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; + case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; + case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::LOAD: + SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); + break; + case ISD::MLOAD: + SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi); + break; + case ISD::MGATHER: + SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi); + break; + case ISD::SETCC: + SplitVecRes_SETCC(N, Lo, Hi); + break; + case ISD::VECTOR_SHUFFLE: + SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); + break; + case ISD::VAARG: + SplitVecRes_VAARG(N, Lo, Hi); + break; + + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + SplitVecRes_ExtVecInRegOp(N, Lo, Hi); + break; + + case ISD::ABS: + case ISD::BITREVERSE: + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: + case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::FCANONICALIZE: + SplitVecRes_UnaryOp(N, Lo, Hi); + break; + + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + SplitVecRes_ExtendOp(N, Lo, Hi); + break; + + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::FDIV: + case ISD::FPOW: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::UREM: + case ISD::SREM: + case ISD::FREM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: + SplitVecRes_BinOp(N, Lo, Hi); + break; + case ISD::FMA: + SplitVecRes_TernaryOp(N, Lo, Hi); + break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: + SplitVecRes_StrictFPOp(N, Lo, Hi); + break; + case ISD::UADDO: + case ISD::SADDO: + case ISD::USUBO: + case ISD::SSUBO: + case ISD::UMULO: + case ISD::SMULO: + SplitVecRes_OverflowOp(N, ResNo, Lo, Hi); + break; + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: + SplitVecRes_MULFIX(N, Lo, Hi); + break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetSplitVector(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDValue RHSLo, RHSHi; + GetSplitVector(N->getOperand(1), RHSLo, RHSHi); + SDLoc dl(N); + + const SDNodeFlags Flags = N->getFlags(); + unsigned Opcode = N->getOpcode(); + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); +} + +void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Op0Lo, Op0Hi; + GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); + SDValue Op1Lo, Op1Hi; + GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); + SDValue Op2Lo, Op2Hi; + GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); + SDLoc dl(N); + + Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), + Op0Lo, Op1Lo, Op2Lo); + Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), + Op0Hi, Op1Hi, Op2Hi); +} + +void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDValue RHSLo, RHSHi; + GetSplitVector(N->getOperand(1), RHSLo, RHSHi); + SDLoc dl(N); + SDValue Op2 = N->getOperand(2); + + unsigned Opcode = N->getOpcode(); + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2); +} + +void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // We know the result is a vector. The input may be either a vector or a + // scalar value. + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + SDLoc dl(N); + + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: + case TargetLowering::TypePromoteFloat: + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypeScalarizeVector: + case TargetLowering::TypeWidenVector: + break; + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + // A scalar to vector conversion, where the scalar needs expansion. + // If the vector is being split in two then we can just convert the + // expanded pieces. + if (LoVT == HiVT) { + GetExpandedOp(InOp, Lo, Hi); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); + return; + } + break; + case TargetLowering::TypeSplitVector: + // If the input is a vector that needs to be split, convert each split + // piece of the input now. + GetSplitVector(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); + return; + } + + // In the general case, convert the input to an integer and split it by hand. + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + if (DAG.getDataLayout().isBigEndian()) + std::swap(LoIntVT, HiIntVT); + + SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + unsigned LoNumElts = LoVT.getVectorNumElements(); + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); + Lo = DAG.getBuildVector(LoVT, dl, LoOps); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end()); + Hi = DAG.getBuildVector(HiVT, dl, HiOps); +} + +void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); + SDLoc dl(N); + unsigned NumSubvectors = N->getNumOperands() / 2; + if (NumSubvectors == 1) { + Lo = N->getOperand(0); + Hi = N->getOperand(1); + return; + } + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps); +} + +void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + SDLoc dl(N); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, + DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); +} + +void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + GetSplitVector(Vec, Lo, Hi); + + EVT VecVT = Vec.getValueType(); + unsigned VecElems = VecVT.getVectorNumElements(); + unsigned SubElems = SubVec.getValueType().getVectorNumElements(); + + // If we know the index is 0, and we know the subvector doesn't cross the + // boundary between the halves, we can avoid spilling the vector, and insert + // into the lower half of the split vector directly. + // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever + // the index is constant and there is no boundary crossing. But those cases + // don't seem to get hit in practice. + if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) { + unsigned IdxVal = ConstIdx->getZExtValue(); + if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) { + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); + return; + } + } + + // Spill the vector to the stack. + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); + + // Store the new subvector into the specified index. + SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo()); + + // Load the Lo part from the stack slot. + Lo = + DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueSizeInBits() / 8; + StackPtr = + DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + MinAlign(Alignment, IncrementSize)); +} + +void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); +} + +void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDLoc DL(N); + + SDValue RHSLo, RHSHi; + SDValue RHS = N->getOperand(1); + EVT RHSVT = RHS.getValueType(); + if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector) + GetSplitVector(RHS, RHSLo, RHSHi); + else + std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS)); + + + Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi); +} + +void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDLoc dl(N); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = + DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT()); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, + DAG.getValueType(LoVT)); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, + DAG.getValueType(HiVT)); +} + +void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + + SDLoc dl(N); + SDValue InLo, InHi; + + if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(N0, InLo, InHi); + else + std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0); + + EVT InLoVT = InLo.getValueType(); + unsigned InNumElements = InLoVT.getVectorNumElements(); + + EVT OutLoVT, OutHiVT; + std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + unsigned OutNumElements = OutLoVT.getVectorNumElements(); + assert((2 * OutNumElements) <= InNumElements && + "Illegal extend vector in reg split"); + + // *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the + // input vector (i.e. we only use InLo): + // OutLo will extend the first OutNumElements from InLo. + // OutHi will extend the next OutNumElements from InLo. + + // Shuffle the elements from InLo for OutHi into the bottom elements to + // create a 'fake' InHi. + SmallVector<int, 8> SplitHi(InNumElements, -1); + for (unsigned i = 0; i != OutNumElements; ++i) + SplitHi[i] = i + OutNumElements; + InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getUNDEF(InLoVT), SplitHi); + + Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo); + Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi); +} + +void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + unsigned NumOps = N->getNumOperands(); + SDValue Chain = N->getOperand(0); + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + SmallVector<SDValue, 4> OpsLo; + SmallVector<SDValue, 4> OpsHi; + + // The Chain is the first operand. + OpsLo.push_back(Chain); + OpsHi.push_back(Chain); + + // Now process the remaining operands. + for (unsigned i = 1; i < NumOps; ++i) { + SDValue Op = N->getOperand(i); + SDValue OpLo = Op; + SDValue OpHi = Op; + + EVT InVT = Op.getValueType(); + if (InVT.isVector()) { + // If the input also splits, handle it directly for a + // compile time speedup. Otherwise split it by hand. + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) + GetSplitVector(Op, OpLo, OpHi); + else + std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i); + } + + OpsLo.push_back(OpLo); + OpsHi.push_back(OpHi); + } + + EVT LoValueVTs[] = {LoVT, MVT::Other}; + EVT HiValueVTs[] = {HiVT, MVT::Other}; + Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo); + Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi); + + // Build a factor node to remember that this Op is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + +SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) { + SDValue Chain = N->getOperand(0); + EVT VT = N->getValueType(0); + unsigned NE = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(N); + + SmallVector<SDValue, 8> Scalars; + SmallVector<SDValue, 4> Operands(N->getNumOperands()); + + // If ResNE is 0, fully unroll the vector op. + if (ResNE == 0) + ResNE = NE; + else if (NE > ResNE) + NE = ResNE; + + //The results of each unrolled operation, including the chain. + EVT ChainVTs[] = {EltVT, MVT::Other}; + SmallVector<SDValue, 8> Chains; + + unsigned i; + for (i = 0; i != NE; ++i) { + Operands[0] = Chain; + for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) { + SDValue Operand = N->getOperand(j); + EVT OperandVT = Operand.getValueType(); + if (OperandVT.isVector()) { + EVT OperandEltVT = OperandVT.getVectorElementType(); + Operands[j] = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, + DAG.getConstant(i, dl, TLI.getVectorIdxTy( + DAG.getDataLayout()))); + } else { + Operands[j] = Operand; + } + } + SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands); + Scalar.getNode()->setFlags(N->getFlags()); + + //Add in the scalar as well as its chain value to the + //result vectors. + Scalars.push_back(Scalar); + Chains.push_back(Scalar.getValue(1)); + } + + for (; i < ResNE; ++i) + Scalars.push_back(DAG.getUNDEF(EltVT)); + + // Build a new factor node to connect the chain back together. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + ReplaceValueWith(SDValue(N, 1), Chain); + + // Create a new BUILD_VECTOR node + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE); + return DAG.getBuildVector(VecVT, dl, Scalars); +} + +void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + EVT ResVT = N->getValueType(0); + EVT OvVT = N->getValueType(1); + EVT LoResVT, HiResVT, LoOvVT, HiOvVT; + std::tie(LoResVT, HiResVT) = DAG.GetSplitDestVTs(ResVT); + std::tie(LoOvVT, HiOvVT) = DAG.GetSplitDestVTs(OvVT); + + SDValue LoLHS, HiLHS, LoRHS, HiRHS; + if (getTypeAction(ResVT) == TargetLowering::TypeSplitVector) { + GetSplitVector(N->getOperand(0), LoLHS, HiLHS); + GetSplitVector(N->getOperand(1), LoRHS, HiRHS); + } else { + std::tie(LoLHS, HiLHS) = DAG.SplitVectorOperand(N, 0); + std::tie(LoRHS, HiRHS) = DAG.SplitVectorOperand(N, 1); + } + + unsigned Opcode = N->getOpcode(); + SDVTList LoVTs = DAG.getVTList(LoResVT, LoOvVT); + SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT); + SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode(); + SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode(); + + Lo = SDValue(LoNode, ResNo); + Hi = SDValue(HiNode, ResNo); + + // Replace the other vector result not being explicitly split here. + unsigned OtherNo = 1 - ResNo; + EVT OtherVT = N->getValueType(OtherNo); + if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) { + SetSplitVector(SDValue(N, OtherNo), + SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo)); + } else { + SDValue OtherVal = DAG.getNode( + ISD::CONCAT_VECTORS, dl, OtherVT, + SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo)); + ReplaceValueWith(SDValue(N, OtherNo), OtherVal); + } +} + +void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Elt = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + GetSplitVector(Vec, Lo, Hi); + + if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) { + unsigned IdxVal = CIdx->getZExtValue(); + unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); + if (IdxVal < LoNumElts) + Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + Lo.getValueType(), Lo, Elt, Idx); + else + Hi = + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, + DAG.getConstant(IdxVal - LoNumElts, dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + return; + } + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return; + + // Make the vector elements byte-addressable if they aren't already. + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + if (VecVT.getScalarSizeInBits() < 8) { + EltVT = MVT::i8; + VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + VecVT.getVectorNumElements()); + Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); + // Extend the element type to match if needed. + if (EltVT.bitsGT(Elt.getValueType())) + Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt); + } + + // Spill the vector to the stack. + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); + + // Store the new element. This may be larger than the vector element type, + // so use a truncating store. + SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, + MachinePointerInfo::getUnknownStack(MF), EltVT); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo); + + // Increment the pointer to the other part. + unsigned IncrementSize = LoVT.getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, dl, + StackPtr.getValueType())); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), + MinAlign(Alignment, IncrementSize)); + + // If we adjusted the original type, we need to truncate the results. + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + if (LoVT != Lo.getValueType()) + Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo); + if (HiVT != Hi.getValueType()) + Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); + Hi = DAG.getUNDEF(HiVT); +} + +void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + EVT MemoryVT = LD->getMemoryVT(); + unsigned Alignment = LD->getOriginalAlignment(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, + LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, + LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, + Alignment, MMOFlags, AAInfo); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), Ch); +} + +void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, + SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(MLD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); + + SDValue Ch = MLD->getChain(); + SDValue Ptr = MLD->getBasePtr(); + SDValue Mask = MLD->getMask(); + SDValue PassThru = MLD->getPassThru(); + unsigned Alignment = MLD->getOriginalAlignment(); + ISD::LoadExtType ExtType = MLD->getExtensionType(); + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } + + EVT MemoryVT = MLD->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue PassThruLo, PassThruHi; + if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(PassThru, PassThruLo, PassThruHi); + else + std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); + + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO, + ExtType, MLD->isExpandingLoad()); + + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, + MLD->isExpandingLoad()); + unsigned HiOffset = LoMemVT.getStoreSize(); + + MMO = DAG.getMachineFunction().getMachineMemOperand( + MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad, + HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), + MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO, + ExtType, MLD->isExpandingLoad()); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MLD, 1), Ch); + +} + +void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, + SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(MGT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); + + SDValue Ch = MGT->getChain(); + SDValue Ptr = MGT->getBasePtr(); + SDValue Mask = MGT->getMask(); + SDValue PassThru = MGT->getPassThru(); + SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); + unsigned Alignment = MGT->getOriginalAlignment(); + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } + + EVT MemoryVT = MGT->getMemoryVT(); + EVT LoMemVT, HiMemVT; + // Split MemoryVT + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue PassThruLo, PassThruHi; + if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(PassThru, PassThruLo, PassThruHi); + else + std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); + + SDValue IndexHi, IndexLo; + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); + else + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; + Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, + MMO, MGT->getIndexType()); + + SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; + Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, + MMO, MGT->getIndexType()); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MGT, 1), Ch); +} + + +void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + + EVT LoVT, HiVT; + SDLoc DL(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // If the input also splits, handle it directly. Otherwise split it by hand. + SDValue LL, LH, RL, RH; + if (getTypeAction(N->getOperand(0).getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(0), LL, LH); + else + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + + if (getTypeAction(N->getOperand(1).getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(1), RL, RH); + else + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); +} + +void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Get the dest types - they may not match the input types, e.g. int_to_fp. + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // If the input also splits, handle it directly for a compile time speedup. + // Otherwise split it by hand. + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + EVT InVT = N->getOperand(OpNo).getValueType(); + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(OpNo), Lo, Hi); + else + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo); + + if (N->getOpcode() == ISD::FP_ROUND) { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) { + Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, + { N->getOperand(0), Lo, N->getOperand(2) }); + Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, + { N->getOperand(0), Hi, N->getOperand(2) }); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewChain); + } else if (N->isStrictFPOpcode()) { + Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, + { N->getOperand(0), Lo }); + Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, + { N->getOperand(0), Hi }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewChain); + } else { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + } +} + +void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DestVT = N->getValueType(0); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + + // We can do better than a generic split operation if the extend is doing + // more than just doubling the width of the elements and the following are + // true: + // - The number of vector elements is even, + // - the source type is legal, + // - the type of a split source is illegal, + // - the type of an extended (by doubling element size) source is legal, and + // - the type of that extended source when split is legal. + // + // This won't necessarily completely legalize the operation, but it will + // more effectively move in the right direction and prevent falling down + // to scalarization in many cases due to the input vector being split too + // far. + unsigned NumElements = SrcVT.getVectorNumElements(); + if ((NumElements & 1) == 0 && + SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { + LLVMContext &Ctx = *DAG.getContext(); + EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx); + EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx); + + EVT SplitLoVT, SplitHiVT; + std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && + TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { + LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:"; + N->dump(&DAG); dbgs() << "\n"); + // Extend the source vector by one step. + SDValue NewSrc = + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + // Get the low and high halves of the new, extended one step, vector. + std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + // Extend those vector halves the rest of the way. + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + return; + } + } + // Fall back to the generic unary operator splitting otherwise. + SplitVecRes_UnaryOp(N, Lo, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, + SDValue &Lo, SDValue &Hi) { + // The low and high parts of the original input give four input vectors. + SDValue Inputs[4]; + SDLoc dl(N); + GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); + GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); + EVT NewVT = Inputs[0].getValueType(); + unsigned NewElts = NewVT.getVectorNumElements(); + + // If Lo or Hi uses elements from at most two of the four input vectors, then + // express it as a vector shuffle of those two inputs. Otherwise extract the + // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. + SmallVector<int, 16> Ops; + for (unsigned High = 0; High < 2; ++High) { + SDValue &Output = High ? Hi : Lo; + + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. + unsigned FirstMaskIdx = High * NewElts; + bool useBuildVector = false; + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element does not index into any input vector. + Ops.push_back(-1); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) { + // This input vector is already an operand. + break; + } else if (InputUsed[OpNo] == -1U) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } + + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up on trying to create a + // shuffle vector. Insert all elements into a BUILD_VECTOR instead. + useBuildVector = true; + break; + } + + // Add the mask index for the new shuffle vector. + Ops.push_back(Idx + OpNo * NewElts); + } + + if (useBuildVector) { + EVT EltVT = NewVT.getVectorElementType(); + SmallVector<SDValue, 16> SVOps; + + // Extract the input elements by hand. + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element is "undef" or indexes off the end of the input. + SVOps.push_back(DAG.getUNDEF(EltVT)); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Extract the vector element by hand. + SVOps.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input], + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + } + + // Construct the Lo/Hi output using a BUILD_VECTOR. + Output = DAG.getBuildVector(NewVT, dl, SVOps); + } else if (InputUsed[0] == -1U) { + // No input vectors were used! The result is undefined. + Output = DAG.getUNDEF(NewVT); + } else { + SDValue Op0 = Inputs[InputUsed[0]]; + // If only one input was used, use an undefined vector for the other. + SDValue Op1 = InputUsed[1] == -1U ? + DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; + // At least one input vector was used. Create a new shuffle vector. + Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops); + } + + Ops.clear(); + } +} + +void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT OVT = N->getValueType(0); + EVT NVT = OVT.getHalfNumVectorElementsVT(*DAG.getContext()); + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + SDValue SV = N->getOperand(2); + SDLoc dl(N); + + const unsigned Alignment = DAG.getDataLayout().getABITypeAlignment( + NVT.getTypeForEVT(*DAG.getContext())); + + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment); + Chain = Hi.getValue(1); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Splitting +//===----------------------------------------------------------------------===// + +/// This method is called when the specified operand of the specified node is +/// found to need vector splitting. At this point, all of the result types of +/// the node are known to be legal, but other operands of the node may need +/// legalization as well as the specified one. +bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { + LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom split this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + if (!Res.getNode()) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to split this operator's " + "operand!\n"); + + case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; + case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::TRUNCATE: + Res = SplitVecOp_TruncateHelper(N); + break; + case ISD::STRICT_FP_ROUND: + case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; + case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; + case ISD::STORE: + Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; + case ISD::MSTORE: + Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); + break; + case ISD::MSCATTER: + Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo); + break; + case ISD::MGATHER: + Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo); + break; + case ISD::VSELECT: + Res = SplitVecOp_VSELECT(N, OpNo); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType())) + Res = SplitVecOp_TruncateHelper(N); + else + Res = SplitVecOp_UnaryOp(N); + break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::STRICT_FP_EXTEND: + case ISD::FP_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::FTRUNC: + case ISD::FCANONICALIZE: + Res = SplitVecOp_UnaryOp(N); + break; + + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + Res = SplitVecOp_ExtVecInRegOp(N); + break; + + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Res = SplitVecOp_VECREDUCE(N, OpNo); + break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + if (N->isStrictFPOpcode()) + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 && + "Invalid operand expansion"); + else + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { + // The only possibility for an illegal operand is the mask, since result type + // legalization would have handled this node already otherwise. + assert(OpNo == 0 && "Illegal operand must be mask"); + + SDValue Mask = N->getOperand(0); + SDValue Src0 = N->getOperand(1); + SDValue Src1 = N->getOperand(2); + EVT Src0VT = Src0.getValueType(); + SDLoc DL(N); + assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?"); + + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + assert(Lo.getValueType() == Hi.getValueType() && + "Lo and Hi have differing types"); + + EVT LoOpVT, HiOpVT; + std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); + + SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; + std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); + + SDValue LoSelect = + DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); + SDValue HiSelect = + DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect); +} + +SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) { + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc dl(N); + + SDValue VecOp = N->getOperand(OpNo); + EVT VecVT = VecOp.getValueType(); + assert(VecVT.isVector() && "Can only split reduce vector operand"); + GetSplitVector(VecOp, Lo, Hi); + EVT LoOpVT, HiOpVT; + std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT); + + bool NoNaN = N->getFlags().hasNoNaNs(); + unsigned CombineOpc = 0; + switch (N->getOpcode()) { + case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break; + case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break; + case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break; + case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break; + case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break; + case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break; + case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break; + case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break; + case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break; + case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break; + case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break; + case ISD::VECREDUCE_FMAX: + CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM; + break; + case ISD::VECREDUCE_FMIN: + CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM; + break; + default: + llvm_unreachable("Unexpected reduce ISD node"); + } + + // Use the appropriate scalar instruction on the split subvectors before + // reducing the now partially reduced smaller vector. + SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags()); + return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags()); +} + +SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc dl(N); + GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + if (N->isStrictFPOpcode()) { + Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, + { N->getOperand(0), Lo }); + Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, + { N->getOperand(0), Hi }); + + // Build a factor node to remember that this operation is independent + // of the other one. + SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); + } else { + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { + // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will + // end up being split all the way down to individual components. Convert the + // split pieces into integers and reassemble. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), + JoinIntegers(Lo, Hi)); +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + // We know that the extracted result type is legal. + EVT SubVT = N->getValueType(0); + SDValue Idx = N->getOperand(1); + SDLoc dl(N); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + if (IdxVal < LoElts) { + assert(IdxVal + SubVT.getVectorNumElements() <= LoElts && + "Extracted subvector crosses vector split!"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + } else { + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, + DAG.getConstant(IdxVal - LoElts, dl, + Idx.getValueType())); + } +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT VecVT = Vec.getValueType(); + + if (isa<ConstantSDNode>(Idx)) { + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + + if (IdxVal < LoElts) + return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); + return SDValue(DAG.UpdateNodeOperands(N, Hi, + DAG.getConstant(IdxVal - LoElts, SDLoc(N), + Idx.getValueType())), 0); + } + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return SDValue(); + + // Make the vector elements byte-addressable if they aren't already. + SDLoc dl(N); + EVT EltVT = VecVT.getVectorElementType(); + if (VecVT.getScalarSizeInBits() < 8) { + EltVT = MVT::i8; + VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + VecVT.getVectorNumElements()); + Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); + } + + // Store the vector to the stack. + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); + + // Load back the required element. + StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + + // FIXME: This is to handle i1 vectors with elements promoted to i8. + // i1 vector handling needs general improvement. + if (N->getValueType(0).bitsLT(EltVT)) { + SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0)); + } + + return DAG.getExtLoad( + ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); +} + +SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { + SDValue Lo, Hi; + + // *_EXTEND_VECTOR_INREG only reference the lower half of the input, so + // splitting the result has the same effect as splitting the input operand. + SplitVecRes_ExtVecInRegOp(N, Lo, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, + unsigned OpNo) { + EVT LoVT, HiVT; + SDLoc dl(MGT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); + + SDValue Ch = MGT->getChain(); + SDValue Ptr = MGT->getBasePtr(); + SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); + SDValue Mask = MGT->getMask(); + SDValue PassThru = MGT->getPassThru(); + unsigned Alignment = MGT->getOriginalAlignment(); + + SDValue MaskLo, MaskHi; + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MGT->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue PassThruLo, PassThruHi; + if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(PassThru, PassThruLo, PassThruHi); + else + std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); + + SDValue IndexHi, IndexLo; + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); + else + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; + SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, + OpsLo, MMO, MGT->getIndexType()); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), + MGT->getRanges()); + + SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; + SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, + OpsHi, MMO, MGT->getIndexType()); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MGT, 1), Ch); + + SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo, + Hi); + ReplaceValueWith(SDValue(MGT, 0), Res); + return SDValue(); +} + +SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, + unsigned OpNo) { + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Mask = N->getMask(); + SDValue Data = N->getValue(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + SDLoc DL(N); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); + else + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } + + SDValue Lo, Hi; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, + N->isTruncatingStore(), + N->isCompressingStore()); + + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, + N->isCompressingStore()); + unsigned HiOffset = LoMemVT.getStoreSize(); + + MMO = DAG.getMachineFunction().getMachineMemOperand( + N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, + HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), + N->getRanges()); + + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, + N->isTruncatingStore(), N->isCompressingStore()); + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, + unsigned OpNo) { + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Mask = N->getMask(); + SDValue Index = N->getIndex(); + SDValue Scale = N->getScale(); + SDValue Data = N->getValue(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + SDLoc DL(N); + + // Split all operands + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); + else + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } + + SDValue IndexHi, IndexLo; + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); + else + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + + SDValue Lo; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; + Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), + DL, OpsLo, MMO, N->getIndexType()); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + // The order of the Scatter operation after split is well defined. The "Hi" + // part comes after the "Lo". So these two operations should be chained one + // after another. + SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale}; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO, N->getIndexType()); +} + +SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed store of vector?"); + assert(OpNo == 1 && "Can only split the stored value"); + SDLoc DL(N); + + bool isTruncating = N->isTruncatingStore(); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); + AAMDNodes AAInfo = N->getAAInfo(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(1), Lo, Hi); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + // Scalarize if the split halves are not byte-sized. + if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) + return TLI.scalarizeVectorStore(N, DAG); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + + if (isTruncating) + Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT, + Alignment, MMOFlags, AAInfo); + else + Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, + AAInfo); + + // Increment the pointer to the other half. + Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); + + if (isTruncating) + Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + HiMemVT, Alignment, MMOFlags, AAInfo); + else + Hi = DAG.getStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + Alignment, MMOFlags, AAInfo); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { + SDLoc DL(N); + + // The input operands all must have the same type, and we know the result + // type is valid. Convert this to a buildvector which extracts all the + // input elements. + // TODO: If the input elements are power-two vectors, we could convert this to + // a new CONCAT_VECTORS node with elements that are half-wide. + SmallVector<SDValue, 32> Elts; + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (const SDValue &Op : N->op_values()) { + for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); + i != e; ++i) { + Elts.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); + } + } + + return DAG.getBuildVector(N->getValueType(0), DL, Elts); +} + +SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { + // The result type is legal, but the input type is illegal. If splitting + // ends up with the result type of each half still being legal, just + // do that. If, however, that would result in an illegal result type, + // we can try to get more clever with power-two vectors. Specifically, + // split the input type, but also widen the result element size, then + // concatenate the halves and truncate again. For example, consider a target + // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit + // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do: + // %inlo = v4i32 extract_subvector %in, 0 + // %inhi = v4i32 extract_subvector %in, 4 + // %lo16 = v4i16 trunc v4i32 %inlo + // %hi16 = v4i16 trunc v4i32 %inhi + // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16 + // %res = v8i8 trunc v8i16 %in16 + // + // Without this transform, the original truncate would end up being + // scalarized, which is pretty much always a last resort. + SDValue InVec = N->getOperand(0); + EVT InVT = InVec->getValueType(0); + EVT OutVT = N->getValueType(0); + unsigned NumElements = OutVT.getVectorNumElements(); + bool IsFloat = OutVT.isFloatingPoint(); + + // Widening should have already made sure this is a power-two vector + // if we're trying to split it at all. assert() that's true, just in case. + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + + unsigned InElementSize = InVT.getScalarSizeInBits(); + unsigned OutElementSize = OutVT.getScalarSizeInBits(); + + // Determine the split output VT. If its legal we can just split dirctly. + EVT LoOutVT, HiOutVT; + std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT); + assert(LoOutVT == HiOutVT && "Unequal split?"); + + // If the input elements are only 1/2 the width of the result elements, + // just use the normal splitting. Our trick only work if there's room + // to split more than once. + if (isTypeLegal(LoOutVT) || + InElementSize <= OutElementSize * 2) + return SplitVecOp_UnaryOp(N); + SDLoc DL(N); + + // Don't touch if this will be scalarized. + EVT FinalVT = InVT; + while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector) + FinalVT = FinalVT.getHalfNumVectorElementsVT(*DAG.getContext()); + + if (getTypeAction(FinalVT) == TargetLowering::TypeScalarizeVector) + return SplitVecOp_UnaryOp(N); + + // Get the split input vector. + SDValue InLoVec, InHiVec; + GetSplitVector(InVec, InLoVec, InHiVec); + + // Truncate them to 1/2 the element size. + EVT HalfElementVT = IsFloat ? + EVT::getFloatingPointVT(InElementSize/2) : + EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, + NumElements/2); + SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); + SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); + // Concatenate them to get the full intermediate truncation result. + EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); + SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, + HalfHi); + // Now finish up by truncating all the way down to the original result + // type. This should normally be something that ends up being legal directly, + // but in theory if a target has very wide vectors and an annoyingly + // restricted set of legal types, this split can chain to build things up. + return IsFloat + ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec, + DAG.getTargetConstant( + 0, DL, TLI.getPointerTy(DAG.getDataLayout()))) + : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); +} + +SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + // The result has a legal vector type, but the input needs splitting. + SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; + SDLoc DL(N); + GetSplitVector(N->getOperand(0), Lo0, Hi0); + GetSplitVector(N->getOperand(1), Lo1, Hi1); + unsigned PartElements = Lo0.getValueType().getVectorNumElements(); + EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements); + EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements); + + LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); + HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); + + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con); +} + + +SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc DL(N); + GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + if (N->isStrictFPOpcode()) { + Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, + { N->getOperand(0), Lo, N->getOperand(2) }); + Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, + { N->getOperand(0), Hi, N->getOperand(2) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewChain); + } else { + Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { + // The result (and the first input) has a legal vector type, but the second + // input needs splitting. + return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); +} + + +//===----------------------------------------------------------------------===// +// Result Vector Widening +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { + LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + + // See if the target wants to custom widen this node. + if (CustomWidenLowerNode(N, N->getValueType(ResNo))) + return; + + SDValue Res = SDValue(); + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "WidenVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to widen the result of this operator!"); + + case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break; + case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; + case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; + case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::VSELECT: + case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; + case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; + case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: + Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); + break; + case ISD::MLOAD: + Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); + break; + case ISD::MGATHER: + Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N)); + break; + + case ISD::ADD: + case ISD::AND: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SUB: + case ISD::XOR: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + case ISD::UADDSAT: + case ISD::SADDSAT: + case ISD::USUBSAT: + case ISD::SSUBSAT: + Res = WidenVecRes_Binary(N); + break; + + case ISD::FADD: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FSUB: + case ISD::FDIV: + case ISD::FREM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + Res = WidenVecRes_BinaryCanTrap(N); + break; + + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: + // These are binary operations, but with an extra operand that shouldn't + // be widened (the scale). + Res = WidenVecRes_BinaryWithExtraScalarOp(N); + break; + + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: + Res = WidenVecRes_StrictFP(N); + break; + + case ISD::UADDO: + case ISD::SADDO: + case ISD::USUBO: + case ISD::SSUBO: + case ISD::UMULO: + case ISD::SMULO: + Res = WidenVecRes_OverflowOp(N, ResNo); + break; + + case ISD::FCOPYSIGN: + Res = WidenVecRes_FCOPYSIGN(N); + break; + + case ISD::FPOWI: + Res = WidenVecRes_POWI(N); + break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + Res = WidenVecRes_Shift(N); + break; + + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + Res = WidenVecRes_EXTEND_VECTOR_INREG(N); + break; + + case ISD::ANY_EXTEND: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::ZERO_EXTEND: + Res = WidenVecRes_Convert(N); + break; + + case ISD::STRICT_FP_EXTEND: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + Res = WidenVecRes_Convert_StrictFP(N); + break; + + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: { + // We're going to widen this vector op to a legal type by padding with undef + // elements. If the wide vector op is eventually going to be expanded to + // scalar libcalls, then unroll into scalar ops now to avoid unnecessary + // libcalls on the undef elements. + EVT VT = N->getValueType(0); + EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && + TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { + Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + break; + } + } + // If the target has custom/legal support for the scalar FP intrinsic ops + // (they are probably not destined to become libcalls), then widen those like + // any other unary ops. + LLVM_FALLTHROUGH; + + case ISD::ABS: + case ISD::BITREVERSE: + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::FNEG: + case ISD::FCANONICALIZE: + Res = WidenVecRes_Unary(N); + break; + case ISD::FMA: + Res = WidenVecRes_Ternary(N); + break; + } + + // If Res is null, the sub-method took care of registering the result. + if (Res.getNode()) + SetWidenedVector(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { + // Ternary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = GetWidenedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { + // Binary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { + // Binary op widening, but with an extra operand that shouldn't be widened. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = N->getOperand(2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3, + N->getFlags()); +} + +// Given a vector of operations that have been broken up to widen, see +// if we can collect them together into the next widest legal VT. This +// implementation is trap-safe. +static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI, + SmallVectorImpl<SDValue> &ConcatOps, + unsigned ConcatEnd, EVT VT, EVT MaxVT, + EVT WidenVT) { + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + SDLoc dl(ConcatOps[0]); + EVT WidenEltVT = WidenVT.getVectorElementType(); + + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + int Idx = ConcatEnd - 1; + VT = ConcatOps[Idx--].getValueType(); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeLegal(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode( + ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx], + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + ConcatOps[Idx+1] = VecOp; + ConcatEnd = Idx + 2; + } else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, SubConcatOps); + ConcatEnd = SubConcatIdx + 1; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); + if (NumOps != ConcatEnd ) { + SDValue UndefVal = DAG.getUNDEF(MaxVT); + for (unsigned j = ConcatEnd; j < NumOps; ++j) + ConcatOps[j] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + makeArrayRef(ConcatOps.data(), NumOps)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { + // Binary op widening for operations that can trap. + unsigned Opcode = N->getOpcode(); + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenEltVT = WidenVT.getVectorElementType(); + EVT VT = WidenVT; + unsigned NumElts = VT.getVectorNumElements(); + const SDNodeFlags Flags = N->getFlags(); + while (!TLI.isTypeLegal(VT) && NumElts != 1) { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } + + if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { + // Operation doesn't trap so just widen as normal. + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); + } + + // No legal vector version so unroll the vector operation and then widen. + if (NumElts == 1) + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + + // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); + + SmallVector<SDValue, 16> ConcatOps(CurNumElts); + unsigned ConcatEnd = 0; // Current ConcatOps index. + int Idx = 0; // Current Idx into input vectors. + + // NumElts := greatest legal vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } + while (CurNumElts != 0) { + while (CurNumElts >= NumElts) { + SDValue EOp1 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp2 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags); + Idx += NumElts; + CurNumElts -= NumElts; + } + do { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } while (!TLI.isTypeLegal(VT) && NumElts != 1); + + if (NumElts == 1) { + for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { + SDValue EOp1 = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp2 = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, + EOp1, EOp2, Flags); + } + CurNumElts = 0; + } + } + + return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { + // StrictFP op widening for operations that can trap. + unsigned NumOpers = N->getNumOperands(); + unsigned Opcode = N->getOpcode(); + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenEltVT = WidenVT.getVectorElementType(); + EVT VT = WidenVT; + unsigned NumElts = VT.getVectorNumElements(); + while (!TLI.isTypeLegal(VT) && NumElts != 1) { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } + + // No legal vector version so unroll the vector operation and then widen. + if (NumElts == 1) + return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements()); + + // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; + SmallVector<SDValue, 4> InOps; + unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); + + SmallVector<SDValue, 16> ConcatOps(CurNumElts); + SmallVector<SDValue, 16> Chains; + unsigned ConcatEnd = 0; // Current ConcatOps index. + int Idx = 0; // Current Idx into input vectors. + + // The Chain is the first operand. + InOps.push_back(N->getOperand(0)); + + // Now process the remaining operands. + for (unsigned i = 1; i < NumOpers; ++i) { + SDValue Oper = N->getOperand(i); + + if (Oper.getValueType().isVector()) { + assert(Oper.getValueType() == N->getValueType(0) && + "Invalid operand type to widen!"); + Oper = GetWidenedVector(Oper); + } + + InOps.push_back(Oper); + } + + // NumElts := greatest legal vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } + while (CurNumElts != 0) { + while (CurNumElts >= NumElts) { + SmallVector<SDValue, 4> EOps; + + for (unsigned i = 0; i < NumOpers; ++i) { + SDValue Op = InOps[i]; + + if (Op.getValueType().isVector()) + Op = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, Op, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + EOps.push_back(Op); + } + + EVT OperVT[] = {VT, MVT::Other}; + SDValue Oper = DAG.getNode(Opcode, dl, OperVT, EOps); + ConcatOps[ConcatEnd++] = Oper; + Chains.push_back(Oper.getValue(1)); + Idx += NumElts; + CurNumElts -= NumElts; + } + do { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } while (!TLI.isTypeLegal(VT) && NumElts != 1); + + if (NumElts == 1) { + for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { + SmallVector<SDValue, 4> EOps; + + for (unsigned i = 0; i < NumOpers; ++i) { + SDValue Op = InOps[i]; + + if (Op.getValueType().isVector()) + Op = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op, + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + + EOps.push_back(Op); + } + + EVT WidenVT[] = {WidenEltVT, MVT::Other}; + SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps); + ConcatOps[ConcatEnd++] = Oper; + Chains.push_back(Oper.getValue(1)); + } + CurNumElts = 0; + } + } + + // Build a factor node to remember all the Ops that have been created. + SDValue NewChain; + if (Chains.size() == 1) + NewChain = Chains[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + ReplaceValueWith(SDValue(N, 1), NewChain); + + return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) { + SDLoc DL(N); + EVT ResVT = N->getValueType(0); + EVT OvVT = N->getValueType(1); + EVT WideResVT, WideOvVT; + SDValue WideLHS, WideRHS; + + // TODO: This might result in a widen/split loop. + if (ResNo == 0) { + WideResVT = TLI.getTypeToTransformTo(*DAG.getContext(), ResVT); + WideOvVT = EVT::getVectorVT( + *DAG.getContext(), OvVT.getVectorElementType(), + WideResVT.getVectorNumElements()); + + WideLHS = GetWidenedVector(N->getOperand(0)); + WideRHS = GetWidenedVector(N->getOperand(1)); + } else { + WideOvVT = TLI.getTypeToTransformTo(*DAG.getContext(), OvVT); + WideResVT = EVT::getVectorVT( + *DAG.getContext(), ResVT.getVectorElementType(), + WideOvVT.getVectorNumElements()); + + SDValue Zero = DAG.getConstant( + 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); + WideLHS = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT), + N->getOperand(0), Zero); + WideRHS = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT), + N->getOperand(1), Zero); + } + + SDVTList WideVTs = DAG.getVTList(WideResVT, WideOvVT); + SDNode *WideNode = DAG.getNode( + N->getOpcode(), DL, WideVTs, WideLHS, WideRHS).getNode(); + + // Replace the other vector result not being explicitly widened here. + unsigned OtherNo = 1 - ResNo; + EVT OtherVT = N->getValueType(OtherNo); + if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) { + SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo)); + } else { + SDValue Zero = DAG.getConstant( + 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue OtherVal = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero); + ReplaceValueWith(SDValue(N, OtherNo), OtherVal); + } + + return SDValue(WideNode, ResNo); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + SDValue InOp = N->getOperand(0); + SDLoc DL(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); + + unsigned Opcode = N->getOpcode(); + unsigned InVTNumElts = InVT.getVectorNumElements(); + const SDNodeFlags Flags = N->getFlags(); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp = GetWidenedVector(N->getOperand(0)); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) { + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InOp); + return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); + } + if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) { + // If both input and result vector types are of same width, extend + // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which + // accepts fewer elements in the result than in the input. + if (Opcode == ISD::ANY_EXTEND) + return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, WidenVT, InOp); + if (Opcode == ISD::SIGN_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, WidenVT, InOp); + if (Opcode == ISD::ZERO_EXTEND) + return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp); + } + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT)); + Ops[0] = InOp; + SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVec); + return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags); + } + + if (InVTNumElts % WidenNumElts == 0) { + SDValue InVal = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + // Extract the input and convert the shorten input vector. + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVal); + return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + EVT EltVT = WidenVT.getVectorElementType(); + SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); + // Use the original element count so we don't do more scalar opts than + // necessary. + unsigned MinElts = N->getValueType(0).getVectorNumElements(); + for (unsigned i=0; i < MinElts; ++i) { + SDValue Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + if (N->getNumOperands() == 1) + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); + else + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags); + } + + return DAG.getBuildVector(WidenVT, DL, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { + SDValue InOp = N->getOperand(1); + SDLoc DL(N); + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other }; + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + + unsigned Opcode = N->getOpcode(); + + // FIXME: Optimizations need to be implemented here. + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + EVT EltVT = WidenVT.getVectorElementType(); + SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other }; + SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector<SDValue, 32> OpChains; + // Use the original element count so we don't do more scalar opts than + // necessary. + unsigned MinElts = N->getValueType(0).getVectorNumElements(); + for (unsigned i=0; i < MinElts; ++i) { + NewOps[1] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps); + OpChains.push_back(Ops[i].getValue(1)); + } + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains); + ReplaceValueWith(SDValue(N, 1), NewChain); + + return DAG.getBuildVector(WidenVT, DL, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { + unsigned Opcode = N->getOpcode(); + SDValue InOp = N->getOperand(0); + SDLoc DL(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenSVT = WidenVT.getVectorElementType(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InSVT = InVT.getVectorElementType(); + unsigned InVTNumElts = InVT.getVectorNumElements(); + + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) { + switch (Opcode) { + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + return DAG.getNode(Opcode, DL, WidenVT, InOp); + } + } + } + + // Unroll, extend the scalars and rebuild the vector. + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) { + SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + switch (Opcode) { + case ISD::ANY_EXTEND_VECTOR_INREG: + Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val); + break; + case ISD::SIGN_EXTEND_VECTOR_INREG: + Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val); + break; + case ISD::ZERO_EXTEND_VECTOR_INREG: + Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val); + break; + default: + llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected"); + } + Ops.push_back(Val); + } + + while (Ops.size() != WidenNumElts) + Ops.push_back(DAG.getUNDEF(WidenSVT)); + + return DAG.getBuildVector(WidenVT, DL, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { + // If this is an FCOPYSIGN with same input types, we can treat it as a + // normal (can trap) binary op. + if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType()) + return WidenVecRes_BinaryCanTrap(N); + + // If the types are different, fall back to unrolling. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); +} + +SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + + EVT ShVT = ShOp.getValueType(); + if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) { + ShOp = GetWidenedVector(ShOp); + ShVT = ShOp.getValueType(); + } + EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), + ShVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + if (ShVT != ShWidenVT) + ShOp = ModifyToType(ShOp, ShWidenVT); + + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { + // Unary op widening. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), + cast<VTSDNode>(N->getOperand(1))->getVT() + .getVectorElementType(), + WidenVT.getVectorNumElements()); + SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + WidenVT, WidenLHS, DAG.getValueType(ExtVT)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { + SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo); + return GetWidenedVector(WidenVec); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDLoc dl(N); + + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + break; + case TargetLowering::TypePromoteInteger: + // If the incoming type is a vector that is being promoted, then + // we know that the elements are arranged differently and that we + // must perform the conversion using a stack slot. + if (InVT.isVector()) + break; + + // If the InOp is promoted to the same size, convert it. Otherwise, + // fall out of the switch and widen the promoted input. + InOp = GetPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); + break; + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypePromoteFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + case TargetLowering::TypeScalarizeVector: + case TargetLowering::TypeSplitVector: + break; + case TargetLowering::TypeWidenVector: + // If the InOp is widened to the same size, convert it. Otherwise, fall + // out of the switch and widen the widened input. + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + // The input widens to the same size. Convert to the widen value. + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); + break; + } + + unsigned WidenSize = WidenVT.getSizeInBits(); + unsigned InSize = InVT.getSizeInBits(); + // x86mmx is not an acceptable vector element type, so don't try. + if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) { + // Determine new input vector type. The new input vector type will use + // the same element type (if its a vector) or use the input type as a + // vector. It is the same size as the type to widen to. + EVT NewInVT; + unsigned NewNumElts = WidenSize / InSize; + if (InVT.isVector()) { + EVT InEltVT = InVT.getVectorElementType(); + NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, + WidenSize / InEltVT.getSizeInBits()); + } else { + NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); + } + + if (TLI.isTypeLegal(NewInVT)) { + SDValue NewVec; + if (InVT.isVector()) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + SmallVector<SDValue, 16> Ops(NewNumElts, DAG.getUNDEF(InVT)); + Ops[0] = InOp; + + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); + } else { + NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp); + } + return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); + } + } + + return CreateStackStoreLoad(InOp, WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { + SDLoc dl(N); + // Build a vector with undefined for the new nodes. + EVT VT = N->getValueType(0); + + // Integer BUILD_VECTOR operands may be larger than the node's vector element + // type. The UNDEFs need to have the same type as the existing operands. + EVT EltVT = N->getOperand(0).getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end()); + assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); + NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); + + return DAG.getBuildVector(WidenVT, dl, NewOps); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { + EVT InVT = N->getOperand(0).getValueType(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumInElts = InVT.getVectorNumElements(); + unsigned NumOperands = N->getNumOperands(); + + bool InputWidened = false; // Indicates we need to widen the input. + if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) { + if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { + // Add undef vectors to widen to correct length. + unsigned NumConcat = WidenVT.getVectorNumElements() / + InVT.getVectorNumElements(); + SDValue UndefVal = DAG.getUNDEF(InVT); + SmallVector<SDValue, 16> Ops(NumConcat); + for (unsigned i=0; i < NumOperands; ++i) + Ops[i] = N->getOperand(i); + for (unsigned i = NumOperands; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops); + } + } else { + InputWidened = true; + if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { + // The inputs and the result are widen to the same value. + unsigned i; + for (i=1; i < NumOperands; ++i) + if (!N->getOperand(i).isUndef()) + break; + + if (i == NumOperands) + // Everything but the first operand is an UNDEF so just return the + // widened first operand. + return GetWidenedVector(N->getOperand(0)); + + if (NumOperands == 2) { + // Replace concat of two operands with a shuffle. + SmallVector<int, 16> MaskOps(WidenNumElts, -1); + for (unsigned i = 0; i < NumInElts; ++i) { + MaskOps[i] = i; + MaskOps[i + NumInElts] = i + WidenNumElts; + } + return DAG.getVectorShuffle(WidenVT, dl, + GetWidenedVector(N->getOperand(0)), + GetWidenedVector(N->getOperand(1)), + MaskOps); + } + } + } + + // Fall back to use extracts and build vector. + EVT EltVT = WidenVT.getVectorElementType(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Idx = 0; + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (InputWidened) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getBuildVector(WidenVT, dl, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + SDValue Idx = N->getOperand(1); + SDLoc dl(N); + + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + InOp = GetWidenedVector(InOp); + + EVT InVT = InOp.getValueType(); + + // Check if we can just return the input vector after widening. + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + if (IdxVal == 0 && InVT == WidenVT) + return InOp; + + // Check if we can extract from the vector. + unsigned InNumElts = InVT.getVectorNumElements(); + if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); + + // We could try widening the input to the right length but for now, extract + // the original elements, fill the rest with undefs and build a vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned i; + for (i=0; i < NumElts; ++i) + Ops[i] = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(IdxVal + i, dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + return DAG.getBuildVector(WidenVT, dl, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), + InOp.getValueType(), InOp, + N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SDValue Result; + SmallVector<SDValue, 16> LdChain; // Chain for the series of load + if (ExtType != ISD::NON_EXTLOAD) + Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); + else + Result = GenWidenVectorLoads(LdChain, LD); + + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewChain); + + return Result; +} + +SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); + SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); + SDValue PassThru = GetWidenedVector(N->getPassThru()); + ISD::LoadExtType ExtType = N->getExtensionType(); + SDLoc dl(N); + + // The mask should be widened as well + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); + + SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), + Mask, PassThru, N->getMemoryVT(), + N->getMemOperand(), ExtType, + N->isExpandingLoad()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { + + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); + SDValue PassThru = GetWidenedVector(N->getPassThru()); + SDValue Scale = N->getScale(); + unsigned NumElts = WideVT.getVectorNumElements(); + SDLoc dl(N); + + // The mask should be widened as well + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WideVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); + + // Widen the Index operand + SDValue Index = N->getIndex(); + EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), + Index.getValueType().getScalarType(), + NumElts); + Index = ModifyToType(Index, WideIndexVT); + SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index, + Scale }; + SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand(), N->getIndexType()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), + WidenVT, N->getOperand(0)); +} + +// Return true if this is a node that could have two SETCCs as operands. +static inline bool isLogicalMaskOp(unsigned Opcode) { + switch (Opcode) { + case ISD::AND: + case ISD::OR: + case ISD::XOR: + return true; + } + return false; +} + +// This is used just for the assert in convertMask(). Check that this either +// a SETCC or a previously handled SETCC by convertMask(). +#ifndef NDEBUG +static inline bool isSETCCorConvertedSETCC(SDValue N) { + if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR) + N = N.getOperand(0); + else if (N.getOpcode() == ISD::CONCAT_VECTORS) { + for (unsigned i = 1; i < N->getNumOperands(); ++i) + if (!N->getOperand(i)->isUndef()) + return false; + N = N.getOperand(0); + } + + if (N.getOpcode() == ISD::TRUNCATE) + N = N.getOperand(0); + else if (N.getOpcode() == ISD::SIGN_EXTEND) + N = N.getOperand(0); + + if (isLogicalMaskOp(N.getOpcode())) + return isSETCCorConvertedSETCC(N.getOperand(0)) && + isSETCCorConvertedSETCC(N.getOperand(1)); + + return (N.getOpcode() == ISD::SETCC || + ISD::isBuildVectorOfConstantSDNodes(N.getNode())); +} +#endif + +// Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT +// to ToMaskVT if needed with vector extension or truncation. +SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, + EVT ToMaskVT) { + // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled. + // FIXME: This code seems to be too restrictive, we might consider + // generalizing it or dropping it. + assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument."); + + // Make a new Mask node, with a legal result VT. + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i) + Ops.push_back(InMask->getOperand(i)); + SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); + + // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign + // extend or truncate is needed. + LLVMContext &Ctx = *DAG.getContext(); + unsigned MaskScalarBits = MaskVT.getScalarSizeInBits(); + unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits(); + if (MaskScalarBits < ToMaskScalBits) { + EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(), + MaskVT.getVectorNumElements()); + Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask); + } else if (MaskScalarBits > ToMaskScalBits) { + EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(), + MaskVT.getVectorNumElements()); + Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask); + } + + assert(Mask->getValueType(0).getScalarSizeInBits() == + ToMaskVT.getScalarSizeInBits() && + "Mask should have the right element size by now."); + + // Adjust Mask to the right number of elements. + unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements(); + if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy); + Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask, + ZeroIdx); + } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) { + unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls); + EVT SubVT = Mask->getValueType(0); + SmallVector<SDValue, 16> SubOps(NumSubVecs, DAG.getUNDEF(SubVT)); + SubOps[0] = Mask; + Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps); + } + + assert((Mask->getValueType(0) == ToMaskVT) && + "A mask of ToMaskVT should have been produced by now."); + + return Mask; +} + +// This method tries to handle VSELECT and its mask by legalizing operands +// (which may require widening) and if needed adjusting the mask vector type +// to match that of the VSELECT. Without it, many cases end up with +// scalarization of the SETCC, with many unnecessary instructions. +SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { + LLVMContext &Ctx = *DAG.getContext(); + SDValue Cond = N->getOperand(0); + + if (N->getOpcode() != ISD::VSELECT) + return SDValue(); + + if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode())) + return SDValue(); + + // If this is a splitted VSELECT that was previously already handled, do + // nothing. + EVT CondVT = Cond->getValueType(0); + if (CondVT.getScalarSizeInBits() != 1) + return SDValue(); + + EVT VSelVT = N->getValueType(0); + // Only handle vector types which are a power of 2. + if (!isPowerOf2_64(VSelVT.getSizeInBits())) + return SDValue(); + + // Don't touch if this will be scalarized. + EVT FinalVT = VSelVT; + while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector) + FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx); + + if (FinalVT.getVectorNumElements() == 1) + return SDValue(); + + // If there is support for an i1 vector mask, don't touch. + if (Cond.getOpcode() == ISD::SETCC) { + EVT SetCCOpVT = Cond->getOperand(0).getValueType(); + while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal) + SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT); + EVT SetCCResVT = getSetCCResultType(SetCCOpVT); + if (SetCCResVT.getScalarSizeInBits() == 1) + return SDValue(); + } else if (CondVT.getScalarType() == MVT::i1) { + // If there is support for an i1 vector mask (or only scalar i1 conditions), + // don't touch. + while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal) + CondVT = TLI.getTypeToTransformTo(Ctx, CondVT); + + if (CondVT.getScalarType() == MVT::i1) + return SDValue(); + } + + // Get the VT and operands for VSELECT, and widen if needed. + SDValue VSelOp1 = N->getOperand(1); + SDValue VSelOp2 = N->getOperand(2); + if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) { + VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT); + VSelOp1 = GetWidenedVector(VSelOp1); + VSelOp2 = GetWidenedVector(VSelOp2); + } + + // The mask of the VSELECT should have integer elements. + EVT ToMaskVT = VSelVT; + if (!ToMaskVT.getScalarType().isInteger()) + ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger(); + + SDValue Mask; + if (Cond->getOpcode() == ISD::SETCC) { + EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType()); + Mask = convertMask(Cond, MaskVT, ToMaskVT); + } else if (isLogicalMaskOp(Cond->getOpcode()) && + Cond->getOperand(0).getOpcode() == ISD::SETCC && + Cond->getOperand(1).getOpcode() == ISD::SETCC) { + // Cond is (AND/OR/XOR (SETCC, SETCC)) + SDValue SETCC0 = Cond->getOperand(0); + SDValue SETCC1 = Cond->getOperand(1); + EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType()); + EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType()); + unsigned ScalarBits0 = VT0.getScalarSizeInBits(); + unsigned ScalarBits1 = VT1.getScalarSizeInBits(); + unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits(); + EVT MaskVT; + // If the two SETCCs have different VTs, either extend/truncate one of + // them to the other "towards" ToMaskVT, or truncate one and extend the + // other to ToMaskVT. + if (ScalarBits0 != ScalarBits1) { + EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1); + EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0); + if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits()) + MaskVT = WideVT; + else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits()) + MaskVT = NarrowVT; + else + MaskVT = ToMaskVT; + } else + // If the two SETCCs have the same VT, don't change it. + MaskVT = VT0; + + // Make new SETCCs and logical nodes. + SETCC0 = convertMask(SETCC0, VT0, MaskVT); + SETCC1 = convertMask(SETCC1, VT1, MaskVT); + Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1); + + // Convert the logical op for VSELECT if needed. + Mask = convertMask(Cond, MaskVT, ToMaskVT); + } else + return SDValue(); + + return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue Cond1 = N->getOperand(0); + EVT CondVT = Cond1.getValueType(); + if (CondVT.isVector()) { + if (SDValue Res = WidenVSELECTAndMask(N)) + return Res; + + EVT CondEltVT = CondVT.getVectorElementType(); + EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), + CondEltVT, WidenNumElts); + if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) + Cond1 = GetWidenedVector(Cond1); + + // If we have to split the condition there is no point in widening the + // select. This would result in an cycle of widening the select -> + // widening the condition operand -> splitting the condition operand -> + // splitting the select -> widening the select. Instead split this select + // further and widen the resulting type. + if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) { + SDValue SplitSelect = SplitVecOp_VSELECT(N, 0); + SDValue Res = ModifyToType(SplitSelect, WidenVT); + return Res; + } + + if (Cond1.getValueType() != CondWidenVT) + Cond1 = ModifyToType(Cond1, CondWidenVT); + } + + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDValue InOp2 = GetWidenedVector(N->getOperand(2)); + assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); + return DAG.getNode(N->getOpcode(), SDLoc(N), + WidenVT, Cond1, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { + SDValue InOp1 = GetWidenedVector(N->getOperand(2)); + SDValue InOp2 = GetWidenedVector(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), + InOp1.getValueType(), N->getOperand(0), + N->getOperand(1), InOp1, InOp2, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getUNDEF(WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned NumElts = VT.getVectorNumElements(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Adjust mask based on new input vector length. + SmallVector<int, 16> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = N->getMaskElt(i); + if (Idx < (int)NumElts) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); + } + for (unsigned i = NumElts; i != WidenNumElts; ++i) + NewMask.push_back(-1); + return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operands must be vectors"); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = N->getOperand(0); + EVT InVT = InOp1.getValueType(); + assert(InVT.isVector() && "can not widen non-vector type"); + EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), WidenNumElts); + + // The input and output types often differ here, and it could be that while + // we'd prefer to widen the result type, the input operands have been split. + // In this case, we also need to split the result of this node as well. + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + SDValue SplitVSetCC = SplitVecOp_VSETCC(N); + SDValue Res = ModifyToType(SplitVSetCC, WidenVT); + return Res; + } + + // If the inputs also widen, handle them directly. Otherwise widen by hand. + SDValue InOp2 = N->getOperand(1); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp1 = GetWidenedVector(InOp1); + InOp2 = GetWidenedVector(InOp2); + } else { + InOp1 = DAG.WidenVector(InOp1, SDLoc(N)); + InOp2 = DAG.WidenVector(InOp2, SDLoc(N)); + } + + // Assume that the input and output will be widen appropriately. If not, + // we will have to unroll it at some point. + assert(InOp1.getValueType() == WidenInVT && + InOp2.getValueType() == WidenInVT && + "Input not widened to expected type!"); + (void)WidenInVT; + return DAG.getNode(ISD::SETCC, SDLoc(N), + WidenVT, InOp1, InOp2, N->getOperand(2)); +} + + +//===----------------------------------------------------------------------===// +// Widen Vector Operand +//===----------------------------------------------------------------------===// +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { + LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom widen this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "WidenVectorOperand op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to widen this operator's operand!"); + + case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; + case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; + case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; + case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break; + case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; + + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = WidenVecOp_EXTEND(N); + break; + + case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::TRUNCATE: + Res = WidenVecOp_Convert(N); + break; + + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Res = WidenVecOp_VECREDUCE(N); + break; + } + + // If Res is null, the sub-method took care of registering the result. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + + if (N->isStrictFPOpcode()) + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 && + "Invalid operand expansion"); + else + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue InOp = N->getOperand(0); + assert(getTypeAction(InOp.getValueType()) == + TargetLowering::TypeWidenVector && + "Unexpected type action"); + InOp = GetWidenedVector(InOp); + assert(VT.getVectorNumElements() < + InOp.getValueType().getVectorNumElements() && + "Input wasn't widened!"); + + // We may need to further widen the operand until it has the same total + // vector size as the result. + EVT InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) { + EVT InEltVT = InVT.getVectorElementType(); + for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { + EVT FixedVT = (MVT::SimpleValueType)i; + EVT FixedEltVT = FixedVT.getVectorElementType(); + if (TLI.isTypeLegal(FixedVT) && + FixedVT.getSizeInBits() == VT.getSizeInBits() && + FixedEltVT == InEltVT) { + assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && + "Not enough elements in the fixed type for the operand!"); + assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && + "We can't have the same type as we started with!"); + if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) + InOp = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + else + InOp = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + break; + } + } + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) + // We couldn't find a legal vector type that was a widening of the input + // and could be extended in-register to the result type, so we have to + // scalarize. + return WidenVecOp_Convert(N); + } + + // Use special DAG nodes to represent the operation of extending the + // low lanes. + switch (N->getOpcode()) { + default: + llvm_unreachable("Extend legalization on extend operation!"); + case ISD::ANY_EXTEND: + return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, InOp); + case ISD::SIGN_EXTEND: + return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, InOp); + case ISD::ZERO_EXTEND: + return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, InOp); + } +} + +SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { + // The result (and first input) is legal, but the second input is illegal. + // We can't do much to fix that, so just unroll and let the extracts off of + // the second input be widened as needed later. + return DAG.UnrollVectorOp(N); +} + +SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { + // Since the result is legal and the input is illegal. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(N); + unsigned NumElts = VT.getVectorNumElements(); + SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0); + assert(getTypeAction(InOp.getValueType()) == + TargetLowering::TypeWidenVector && + "Unexpected type action"); + InOp = GetWidenedVector(InOp); + EVT InVT = InOp.getValueType(); + unsigned Opcode = N->getOpcode(); + + // See if a widened result type would be legal, if so widen the node. + // FIXME: This isn't safe for StrictFP. Other optimization here is needed. + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + InVT.getVectorNumElements()); + if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) { + SDValue Res; + if (N->isStrictFPOpcode()) { + Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, + { N->getOperand(0), InOp }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + } else + Res = DAG.getNode(Opcode, dl, WideVT, InOp); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, Res, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + EVT InEltVT = InVT.getVectorElementType(); + + // Unroll the convert into some scalar code and create a nasty build vector. + SmallVector<SDValue, 16> Ops(NumElts); + if (N->isStrictFPOpcode()) { + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + SmallVector<SDValue, 32> OpChains; + for (unsigned i=0; i < NumElts; ++i) { + NewOps[1] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps); + OpChains.push_back(Ops[i].getValue(1)); + } + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); + ReplaceValueWith(SDValue(N, 1), NewChain); + } else { + for (unsigned i = 0; i < NumElts; ++i) + Ops[i] = DAG.getNode( + Opcode, dl, EltVT, + DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + } + + return DAG.getBuildVector(VT, dl, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + EVT InWidenVT = InOp.getValueType(); + SDLoc dl(N); + + // Check if we can convert between two legal vector types and extract. + unsigned InWidenSize = InWidenVT.getSizeInBits(); + unsigned Size = VT.getSizeInBits(); + // x86mmx is not an acceptable vector element type, so don't try. + if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) { + unsigned NewNumElts = InWidenSize / Size; + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); + if (TLI.isTypeLegal(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); + return DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + } + + // Handle a case like bitcast v12i8 -> v3i32. Normally that would get widened + // to v16i8 -> v4i32, but for a target where v3i32 is legal but v12i8 is not, + // we end up here. Handling the case here with EXTRACT_SUBVECTOR avoids + // having to copy via memory. + if (VT.isVector()) { + EVT EltVT = VT.getVectorElementType(); + unsigned EltSize = EltVT.getSizeInBits(); + if (InWidenSize % EltSize == 0) { + unsigned NewNumElts = InWidenSize / EltSize; + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts); + if (TLI.isTypeLegal(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + } + } + + return CreateStackStoreLoad(InOp, VT); +} + +SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + EVT InVT = N->getOperand(0).getValueType(); + SDLoc dl(N); + + // If the widen width for this operand is the same as the width of the concat + // and all but the first operand is undef, just use the widened operand. + unsigned NumOperands = N->getNumOperands(); + if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { + unsigned i; + for (i = 1; i < NumOperands; ++i) + if (!N->getOperand(i).isUndef()) + break; + + if (i == NumOperands) + return GetWidenedVector(N->getOperand(0)); + } + + // Otherwise, fall back to a nasty build vector. + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + + unsigned NumInElts = InVT.getVectorNumElements(); + + unsigned Idx = 0; + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + assert(getTypeAction(InOp.getValueType()) == + TargetLowering::TypeWidenVector && + "Unexpected type action"); + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + return DAG.getBuildVector(VT, dl, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), + N->getValueType(0), InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + N->getValueType(0), InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { + // We have to widen the value, but we want only to store the original + // vector type. + StoreSDNode *ST = cast<StoreSDNode>(N); + + if (!ST->getMemoryVT().getScalarType().isByteSized()) + return TLI.scalarizeVectorStore(ST, DAG); + + SmallVector<SDValue, 16> StChain; + if (ST->isTruncatingStore()) + GenWidenVectorTruncStores(StChain, ST); + else + GenWidenVectorStores(StChain, ST); + + if (StChain.size() == 1) + return StChain[0]; + else + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); +} + +SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { + assert((OpNo == 1 || OpNo == 3) && + "Can widen only data or mask operand of mstore"); + MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); + SDValue Mask = MST->getMask(); + EVT MaskVT = Mask.getValueType(); + SDValue StVal = MST->getValue(); + SDLoc dl(N); + + if (OpNo == 1) { + // Widen the value. + StVal = GetWidenedVector(StVal); + + // The mask should be widened as well. + EVT WideVT = StVal.getValueType(); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WideVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); + } else { + // Widen the mask. + EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT); + Mask = ModifyToType(Mask, WideMaskVT, true); + + EVT ValueVT = StVal.getValueType(); + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), + ValueVT.getVectorElementType(), + WideMaskVT.getVectorNumElements()); + StVal = ModifyToType(StVal, WideVT); + } + + assert(Mask.getValueType().getVectorNumElements() == + StVal.getValueType().getVectorNumElements() && + "Mask and data vectors should have the same number of elements"); + return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(), + Mask, MST->getMemoryVT(), MST->getMemOperand(), + false, MST->isCompressingStore()); +} + +SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) { + assert(OpNo == 4 && "Can widen only the index of mgather"); + auto *MG = cast<MaskedGatherSDNode>(N); + SDValue DataOp = MG->getPassThru(); + SDValue Mask = MG->getMask(); + SDValue Scale = MG->getScale(); + + // Just widen the index. It's allowed to have extra elements. + SDValue Index = GetWidenedVector(MG->getIndex()); + + SDLoc dl(N); + SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index, + Scale}; + SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops, + MG->getMemOperand(), MG->getIndexType()); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); + return SDValue(); +} + +SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { + MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); + SDValue DataOp = MSC->getValue(); + SDValue Mask = MSC->getMask(); + SDValue Index = MSC->getIndex(); + SDValue Scale = MSC->getScale(); + + if (OpNo == 1) { + DataOp = GetWidenedVector(DataOp); + unsigned NumElts = DataOp.getValueType().getVectorNumElements(); + + // Widen index. + EVT IndexVT = Index.getValueType(); + EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), + IndexVT.getVectorElementType(), NumElts); + Index = ModifyToType(Index, WideIndexVT); + + // The mask should be widened as well. + EVT MaskVT = Mask.getValueType(); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), NumElts); + Mask = ModifyToType(Mask, WideMaskVT, true); + } else if (OpNo == 4) { + // Just widen the index. It's allowed to have extra elements. + Index = GetWidenedVector(Index); + } else + llvm_unreachable("Can't widen this operand of mscatter"); + + SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index, + Scale}; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), + MSC->getMemoryVT(), SDLoc(N), Ops, + MSC->getMemOperand(), MSC->getIndexType()); +} + +SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { + SDValue InOp0 = GetWidenedVector(N->getOperand(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDLoc dl(N); + EVT VT = N->getValueType(0); + + // WARNING: In this code we widen the compare instruction with garbage. + // This garbage may contain denormal floats which may be slow. Is this a real + // concern ? Should we zero the unused lanes if this is a float compare ? + + // Get a new SETCC node to compare the newly widened operands. + // Only some of the compared elements are legal. + EVT SVT = getSetCCResultType(InOp0.getValueType()); + // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. + if (VT.getScalarType() == MVT::i1) + SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + SVT.getVectorNumElements()); + + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), + SVT, InOp0, InOp1, N->getOperand(2)); + + // Extract the needed results from the result vector. + EVT ResVT = EVT::getVectorVT(*DAG.getContext(), + SVT.getVectorElementType(), + VT.getVectorNumElements()); + SDValue CC = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, dl, VT, CC); +} + +SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { + SDLoc dl(N); + SDValue Op = GetWidenedVector(N->getOperand(0)); + EVT OrigVT = N->getOperand(0).getValueType(); + EVT WideVT = Op.getValueType(); + EVT ElemVT = OrigVT.getVectorElementType(); + + SDValue NeutralElem; + switch (N->getOpcode()) { + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_UMAX: + NeutralElem = DAG.getConstant(0, dl, ElemVT); + break; + case ISD::VECREDUCE_MUL: + NeutralElem = DAG.getConstant(1, dl, ElemVT); + break; + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_UMIN: + NeutralElem = DAG.getAllOnesConstant(dl, ElemVT); + break; + case ISD::VECREDUCE_SMAX: + NeutralElem = DAG.getConstant( + APInt::getSignedMinValue(ElemVT.getSizeInBits()), dl, ElemVT); + break; + case ISD::VECREDUCE_SMIN: + NeutralElem = DAG.getConstant( + APInt::getSignedMaxValue(ElemVT.getSizeInBits()), dl, ElemVT); + break; + case ISD::VECREDUCE_FADD: + NeutralElem = DAG.getConstantFP(0.0, dl, ElemVT); + break; + case ISD::VECREDUCE_FMUL: + NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT); + break; + case ISD::VECREDUCE_FMAX: + NeutralElem = DAG.getConstantFP( + std::numeric_limits<double>::infinity(), dl, ElemVT); + break; + case ISD::VECREDUCE_FMIN: + NeutralElem = DAG.getConstantFP( + -std::numeric_limits<double>::infinity(), dl, ElemVT); + break; + } + + // Pad the vector with the neutral element. + unsigned OrigElts = OrigVT.getVectorNumElements(); + unsigned WideElts = WideVT.getVectorNumElements(); + for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) + Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags()); +} + +SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { + // This only gets called in the case that the left and right inputs and + // result are of a legal odd vector type, and the condition is illegal i1 of + // the same odd width that needs widening. + EVT VT = N->getValueType(0); + assert(VT.isVector() && !VT.isPow2VectorType() && isTypeLegal(VT)); + + SDValue Cond = GetWidenedVector(N->getOperand(0)); + SDValue LeftIn = DAG.WidenVector(N->getOperand(1), SDLoc(N)); + SDValue RightIn = DAG.WidenVector(N->getOperand(2), SDLoc(N)); + SDLoc DL(N); + + SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond, + LeftIn, RightIn); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, VT, Select, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); +} + +//===----------------------------------------------------------------------===// +// Vector Widening Utilities +//===----------------------------------------------------------------------===// + +// Utility function to find the type to chop up a widen vector for load/store +// TLI: Target lowering used to determine legal types. +// Width: Width left need to load/store. +// WidenVT: The widen vector type to load to/store from +// Align: If 0, don't allow use of a wider type +// WidenEx: If Align is not 0, the amount additional we can load/store from. + +static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, + unsigned Width, EVT WidenVT, + unsigned Align = 0, unsigned WidenEx = 0) { + EVT WidenEltVT = WidenVT.getVectorElementType(); + unsigned WidenWidth = WidenVT.getSizeInBits(); + unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); + unsigned AlignInBits = Align*8; + + // If we have one element to load/store, return it. + EVT RetVT = WidenEltVT; + if (Width == WidenEltWidth) + return RetVT; + + // See if there is larger legal integer than the element type to load/store. + unsigned VT; + for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; + VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { + EVT MemVT((MVT::SimpleValueType) VT); + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (MemVT.getSizeInBits() <= WidenEltWidth) + break; + auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); + if ((Action == TargetLowering::TypeLegal || + Action == TargetLowering::TypePromoteInteger) && + (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + if (MemVTWidth == WidenWidth) + return MemVT; + RetVT = MemVT; + break; + } + } + + // See if there is a larger vector type to load/store that has the same vector + // element type and is evenly divisible with the WidenVT. + for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; + VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { + EVT MemVT = (MVT::SimpleValueType) VT; + unsigned MemVTWidth = MemVT.getSizeInBits(); + auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); + if ((Action == TargetLowering::TypeLegal || + Action == TargetLowering::TypePromoteInteger) && + WidenEltVT == MemVT.getVectorElementType() && + (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) + return MemVT; + } + } + + return RetVT; +} + +// Builds a vector type from scalar loads +// VecTy: Resulting Vector type +// LDOps: Load operators to build a vector type +// [Start,End) the list of loads to use. +static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, + SmallVectorImpl<SDValue> &LdOps, + unsigned Start, unsigned End) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc dl(LdOps[Start]); + EVT LdTy = LdOps[Start].getValueType(); + unsigned Width = VecTy.getSizeInBits(); + unsigned NumElts = Width / LdTy.getSizeInBits(); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts); + + unsigned Idx = 1; + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]); + + for (unsigned i = Start + 1; i != End; ++i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + NumElts = Width / NewLdTy.getSizeInBits(); + NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); + VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp); + // Readjust position and vector position based on new load type. + Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); + LdTy = NewLdTy; + } + VecOp = DAG.getNode( + ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], + DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); +} + +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, + LoadSDNode *LD) { + // The strategy assumes that we can efficiently load power-of-two widths. + // The routine chops the vector into the largest vector loads with the same + // element type or scalar loads and then recombines it to the widen vector + // type. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + unsigned WidenWidth = WidenVT.getSizeInBits(); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + int LdWidth = LdVT.getSizeInBits(); + int WidthDiff = WidenWidth - LdWidth; + unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads. + + // Find the vector type that can load from. + EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + int NewVTWidth = NewVT.getSizeInBits(); + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), + Align, MMOFlags, AAInfo); + LdChain.push_back(LdOp.getValue(1)); + + // Check if we can load the element with one instruction. + if (LdWidth <= NewVTWidth) { + if (!NewVT.isVector()) { + unsigned NumElts = WidenWidth / NewVTWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + if (NewVT == WidenVT) + return LdOp; + + assert(WidenWidth % NewVTWidth == 0); + unsigned NumConcat = WidenWidth / NewVTWidth; + SmallVector<SDValue, 16> ConcatOps(NumConcat); + SDValue UndefVal = DAG.getUNDEF(NewVT); + ConcatOps[0] = LdOp; + for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); + } + + // Load vector by using multiple loads from largest vector to scalar. + SmallVector<SDValue, 16> LdOps; + LdOps.push_back(LdOp); + + LdWidth -= NewVTWidth; + unsigned Offset = 0; + + while (LdWidth > 0) { + unsigned Increment = NewVTWidth / 8; + Offset += Increment; + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); + + SDValue L; + if (LdWidth < NewVTWidth) { + // The current type we are using is too large. Find a better size. + NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + NewVTWidth = NewVT.getSizeInBits(); + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Increment), MMOFlags, AAInfo); + LdChain.push_back(L.getValue(1)); + if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) { + // Later code assumes the vector loads produced will be mergeable, so we + // must pad the final entry up to the previous width. Scalars are + // combined separately. + SmallVector<SDValue, 16> Loads; + Loads.push_back(L); + unsigned size = L->getValueSizeInBits(0); + while (size < LdOp->getValueSizeInBits(0)) { + Loads.push_back(DAG.getUNDEF(L->getValueType(0))); + size += L->getValueSizeInBits(0); + } + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads); + } + } else { + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Increment), MMOFlags, AAInfo); + LdChain.push_back(L.getValue(1)); + } + + LdOps.push_back(L); + LdOp = L; + + LdWidth -= NewVTWidth; + } + + // Build the vector from the load operations. + unsigned End = LdOps.size(); + if (!LdOps[0].getValueType().isVector()) + // All the loads are scalar loads. + return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); + + // If the load contains vectors, build the vector using concat vector. + // All of the vectors used to load are power-of-2, and the scalar loads can be + // combined to make a power-of-2 vector. + SmallVector<SDValue, 16> ConcatOps(End); + int i = End - 1; + int Idx = End; + EVT LdTy = LdOps[i].getValueType(); + // First, combine the scalar loads to a vector. + if (!LdTy.isVector()) { + for (--i; i >= 0; --i) { + LdTy = LdOps[i].getValueType(); + if (LdTy.isVector()) + break; + } + ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End); + } + ConcatOps[--Idx] = LdOps[i]; + for (--i; i >= 0; --i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + // Create a larger vector. + ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, + makeArrayRef(&ConcatOps[Idx], End - Idx)); + Idx = End - 1; + LdTy = NewLdTy; + } + ConcatOps[--Idx] = LdOps[i]; + } + + if (WidenWidth == LdTy.getSizeInBits() * (End - Idx)) + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + makeArrayRef(&ConcatOps[Idx], End - Idx)); + + // We need to fill the rest with undefs to build the vector. + unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); + SmallVector<SDValue, 16> WidenOps(NumOps); + SDValue UndefVal = DAG.getUNDEF(LdTy); + { + unsigned i = 0; + for (; i != End-Idx; ++i) + WidenOps[i] = ConcatOps[Idx+i]; + for (; i != NumOps; ++i) + WidenOps[i] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps); +} + +SDValue +DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, + LoadSDNode *LD, + ISD::LoadExtType ExtType) { + // For extension loads, it may not be more efficient to chop up the vector + // and then extend it. Instead, we unroll the load and build a new vector. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + EVT EltVT = WidenVT.getVectorElementType(); + EVT LdEltVT = LdVT.getVectorElementType(); + unsigned NumElts = LdVT.getVectorNumElements(); + + // Load each element and widen. + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Increment = LdEltVT.getSizeInBits() / 8; + Ops[0] = + DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), + LdEltVT, Align, MMOFlags, AAInfo); + LdChain.push_back(Ops[0].getValue(1)); + unsigned i = 0, Offset = Increment; + for (i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); + Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, + LD->getPointerInfo().getWithOffset(Offset), LdEltVT, + Align, MMOFlags, AAInfo); + LdChain.push_back(Ops[i].getValue(1)); + } + + // Fill the rest with undefs. + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i != WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getBuildVector(WidenVT, dl, Ops); +} + +void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, + StoreSDNode *ST) { + // The strategy assumes that we can efficiently store power-of-two widths. + // The routine chops the vector into the largest vector stores with the same + // element type or scalar stores. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + unsigned Align = ST->getAlignment(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); + AAMDNodes AAInfo = ST->getAAInfo(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + SDLoc dl(ST); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + EVT ValVT = ValOp.getValueType(); + unsigned ValWidth = ValVT.getSizeInBits(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned ValEltWidth = ValEltVT.getSizeInBits(); + assert(StVT.getVectorElementType() == ValEltVT); + + int Idx = 0; // current index to store + unsigned Offset = 0; // offset from base to store + while (StWidth != 0) { + // Find the largest vector type we can store with. + EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); + unsigned NewVTWidth = NewVT.getSizeInBits(); + unsigned Increment = NewVTWidth / 8; + if (NewVT.isVector()) { + unsigned NumVTElts = NewVT.getVectorNumElements(); + do { + SDValue EOp = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + StChain.push_back(DAG.getStore( + Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Offset), MMOFlags, AAInfo)); + StWidth -= NewVTWidth; + Offset += Increment; + Idx += NumVTElts; + + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); + } while (StWidth != 0 && StWidth >= NewVTWidth); + } else { + // Cast the vector to the scalar type we can store. + unsigned NumElts = ValWidth / NewVTWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); + // Readjust index position based on new vector type. + Idx = Idx * ValEltWidth / NewVTWidth; + do { + SDValue EOp = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, + DAG.getConstant(Idx++, dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + StChain.push_back(DAG.getStore( + Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Offset), MMOFlags, AAInfo)); + StWidth -= NewVTWidth; + Offset += Increment; + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); + } while (StWidth != 0 && StWidth >= NewVTWidth); + // Restore index back to be relative to the original widen element type. + Idx = Idx * NewVTWidth / ValEltWidth; + } + } +} + +void +DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, + StoreSDNode *ST) { + // For extension loads, it may not be more efficient to truncate the vector + // and then store it. Instead, we extract each element and then store it. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + unsigned Align = ST->getAlignment(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); + AAMDNodes AAInfo = ST->getAAInfo(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + SDLoc dl(ST); + + EVT StVT = ST->getMemoryVT(); + EVT ValVT = ValOp.getValueType(); + + // It must be true that the wide vector type is bigger than where we need to + // store. + assert(StVT.isVector() && ValOp.getValueType().isVector()); + assert(StVT.bitsLT(ValOp.getValueType())); + + // For truncating stores, we can not play the tricks of chopping legal vector + // types and bitcast it to the right type. Instead, we unroll the store. + EVT StEltVT = StVT.getVectorElementType(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned Increment = ValEltVT.getSizeInBits() / 8; + unsigned NumElts = StVT.getVectorNumElements(); + SDValue EOp = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo(), StEltVT, Align, + MMOFlags, AAInfo)); + unsigned Offset = Increment; + for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); + SDValue EOp = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + StChain.push_back(DAG.getTruncStore( + Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), + StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo)); + } +} + +/// Modifies a vector input (widen or narrows) to a vector of NVT. The +/// input vector must have the same element type as NVT. +/// FillWithZeroes specifies that the vector should be widened with zeroes. +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, + bool FillWithZeroes) { + // Note that InOp might have been widened so it might already have + // the right width or it might need be narrowed. + EVT InVT = InOp.getValueType(); + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + SDLoc dl(InOp); + + // Check if InOp already has the right width. + if (InVT == NVT) + return InOp; + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { + unsigned NumConcat = WidenNumElts / InNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) : + DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = FillVal; + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); + } + + if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + // Fall back to extract and build. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = NVT.getVectorElementType(); + unsigned MinNumElts = std::min(WidenNumElts, InNumElts); + unsigned Idx; + for (Idx = 0; Idx < MinNumElts; ++Idx) + Ops[Idx] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); + for ( ; Idx < WidenNumElts; ++Idx) + Ops[Idx] = FillVal; + return DAG.getBuildVector(NVT, dl, Ops); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp new file mode 100644 index 0000000000000..34660e3a48ec5 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -0,0 +1,625 @@ +//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the ResourcePriorityQueue class, which is a +// SchedulingPriorityQueue that prioritizes instructions using DFA state to +// reduce the length of the critical path through the basic block +// on VLIW platforms. +// The scheduler is basically a top-down adaptable list scheduler with DFA +// resource tracking added to the cost function. +// DFA is queried as a state machine to model "packets/bundles" during +// schedule. Currently packets/bundles are discarded at the end of +// scheduling, affecting only order of instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "scheduler" + +static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable use of DFA during scheduling")); + +static cl::opt<int> RegPressureThreshold( + "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), + cl::desc("Track reg pressure and switch priority to in-depth")); + +ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) + : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + TRI = STI.getRegisterInfo(); + TLI = IS->TLI; + TII = STI.getInstrInfo(); + ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); + // This hard requirement could be relaxed, but for now + // do not let it proceed. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (const TargetRegisterClass *RC : TRI->regclasses()) + RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF); + + ParallelLiveRanges = 0; + HorizontalVerticalBalance = 0; +} + +unsigned +ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { + unsigned NumberDeps = 0; + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + continue; + + SUnit *PredSU = Pred.getSUnit(); + const SDNode *ScegN = PredSU->getNode(); + + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: NumberDeps++; break; + case ISD::CopyToReg: break; + case ISD::INLINEASM: break; + case ISD::INLINEASM_BR: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + MVT VT = ScegN->getSimpleValueType(i); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, + unsigned RCId) { + unsigned NumberDeps = 0; + for (const SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) + continue; + + SUnit *SuccSU = Succ.getSUnit(); + const SDNode *ScegN = SuccSU->getNode(); + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: break; + case ISD::CopyToReg: NumberDeps++; break; + case ISD::INLINEASM: break; + case ISD::INLINEASM_BR: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +static unsigned numberCtrlDepsInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (const SDep &Succ : SU->Succs) + if (Succ.isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +static unsigned numberCtrlPredInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SDep &Pred : SU->Preds) + if (Pred.isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +/// +/// Initialize nodes. +/// +void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + NumNodesSolelyBlocking.resize(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + initNumRegDefsLeft(SU); + SU->NodeQueueId = 0; + } +} + +/// This heuristic is used if DFA scheduling is not desired +/// for some VLIW platform. +bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = nullptr; + for (const SDep &Pred : SU->Preds) { + SUnit &PredSU = *Pred.getSUnit(); + if (!PredSU.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &PredSU) + return nullptr; + OnlyAvailablePred = &PredSU; + } + } + return OnlyAvailablePred; +} + +void ResourcePriorityQueue::push(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (const SDep &Succ : SU->Succs) + if (getSingleUnscheduledPred(Succ.getSUnit()) == SU) + ++NumNodesBlocking; + + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + Queue.push_back(SU); +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getNode()) + return false; + + // If this is a compound instruction, + // it is likely to be a call. Do not delay it. + if (SU->getNode()->getGluedNode()) + return true; + + // First see if the pipeline could receive this instruction + // in the current cycle. + if (SU->getNode()->isMachineOpcode()) + switch (SU->getNode()->getMachineOpcode()) { + default: + if (!ResourcesModel->canReserveResources(&TII->get( + SU->getNode()->getMachineOpcode()))) + return false; + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + + // Now see if there are no other dependencies + // to instructions already in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + for (const SDep &Succ : Packet[i]->Succs) { + // Since we do not add pseudos to packets, might as well + // ignore order deps. + if (Succ.isCtrl()) + continue; + + if (Succ.getSUnit() == SU) + return false; + } + + return true; +} + +/// Keep track of available resources. +void ResourcePriorityQueue::reserveResources(SUnit *SU) { + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) { + ResourcesModel->clearResources(); + Packet.clear(); + } + + if (SU->getNode() && SU->getNode()->isMachineOpcode()) { + switch (SU->getNode()->getMachineOpcode()) { + default: + ResourcesModel->reserveResources(&TII->get( + SU->getNode()->getMachineOpcode())); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + Packet.push_back(SU); + } + // Forcefully end packet for PseudoOps. + else { + ResourcesModel->clearResources(); + Packet.clear(); + } + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= InstrItins->SchedModel.IssueWidth) { + ResourcesModel->clearResources(); + Packet.clear(); + } +} + +int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { + int RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + // Gen estimate. + for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { + MVT VT = SU->getNode()->getSimpleValueType(i); + if (TLI->isTypeLegal(VT) + && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance += numberRCValSuccInSU(SU, RCId); + } + // Kill estimate. + for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { + const SDValue &Op = SU->getNode()->getOperand(i); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + if (isa<ConstantSDNode>(Op.getNode())) + continue; + + if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance -= numberRCValPredInSU(SU, RCId); + } + return RegBalance; +} + +/// Estimates change in reg pressure from this SU. +/// It is achieved by trivial tracking of defined +/// and used vregs in dependent instructions. +/// The RawPressure flag makes this function to ignore +/// existing reg file sizes, and report raw def/use +/// balance. +int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { + int RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + if (RawPressure) { + for (const TargetRegisterClass *RC : TRI->regclasses()) + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + else { + for (const TargetRegisterClass *RC : TRI->regclasses()) { + if ((RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) > 0) && + (RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()])) + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + + return RegBalance; +} + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityTwo = 50; +static const unsigned PriorityThree = 15; +static const unsigned PriorityFour = 5; +static const unsigned ScaleOne = 20; +static const unsigned ScaleTwo = 10; +static const unsigned ScaleThree = 5; +static const unsigned FactorOne = 2; + +/// Returns single number reflecting benefit of scheduling SU +/// in the current cycle. +int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { + // Initial trivial priority. + int ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Adaptable scheduling + // A small, but very parallel + // region, where reg pressure is an issue. + if (HorizontalVerticalBalance > RegPressureThreshold) { + // Critical path first + ResCount += (SU->getHeight() * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + // Consider change to reg pressure from scheduling + // this SU. + ResCount -= (regPressureDelta(SU,true) * ScaleOne); + } + // Default heuristic, greeady and + // critical path driven. + else { + // Critical path first. + ResCount += (SU->getHeight() * ScaleTwo); + // Now see how many instructions is blocked by this SU. + ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + ResCount -= (regPressureDelta(SU) * ScaleTwo); + } + + // These are platform-specific things. + // Will need to go into the back end + // and accessed from here via a hook. + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + if (TID.isCall()) + ResCount += (PriorityTwo + (ScaleThree*N->getNumValues())); + } + else + switch (N->getOpcode()) { + default: break; + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + ResCount += PriorityFour; + break; + + case ISD::INLINEASM: + case ISD::INLINEASM_BR: + ResCount += PriorityThree; + break; + } + } + return ResCount; +} + + +/// Main resource tracking point. +void ResourcePriorityQueue::scheduledNode(SUnit *SU) { + // Use NULL entry as an event marker to reset + // the DFA state. + if (!SU) { + ResourcesModel->clearResources(); + Packet.clear(); + return; + } + + const SDNode *ScegN = SU->getNode(); + // Update reg pressure tracking. + // First update current node. + if (ScegN->isMachineOpcode()) { + // Estimate generated regs. + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + MVT VT = ScegN->getSimpleValueType(i); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) + RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); + } + } + // Estimate killed regs. + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) { + if (RegPressure[RC->getID()] > + (numberRCValPredInSU(SU, RC->getID()))) + RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); + else RegPressure[RC->getID()] = 0; + } + } + } + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0)) + continue; + --Pred.getSUnit()->NumRegDefsLeft; + } + } + + // Reserve resources for this SU. + reserveResources(SU); + + // Adjust number of parallel live ranges. + // Heuristic is simple - node with no data successors reduces + // number of live ranges. All others, increase it. + unsigned NumberNonControlDeps = 0; + + for (const SDep &Succ : SU->Succs) { + adjustPriorityOfUnscheduledPreds(Succ.getSUnit()); + if (!Succ.isCtrl()) + NumberNonControlDeps++; + } + + if (!NumberNonControlDeps) { + if (ParallelLiveRanges >= SU->NumPreds) + ParallelLiveRanges -= SU->NumPreds; + else + ParallelLiveRanges = 0; + + } + else + ParallelLiveRanges += SU->NumRegDefsLeft; + + // Track parallel live chains. + HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); + HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); +} + +void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { + unsigned NodeNumDefs = 0; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + // No register need be allocated for this. + if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + NodeNumDefs = 0; + break; + } + NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); + } + else + switch(N->getOpcode()) { + default: break; + case ISD::CopyFromReg: + NodeNumDefs++; + break; + case ISD::INLINEASM: + case ISD::INLINEASM_BR: + NodeNumDefs++; + break; + } + + SU->NumRegDefsLeft = NodeNumDefs; +} + +/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) + return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} + + +/// Main access point - returns next instructions +/// to be placed in scheduling sequence. +SUnit *ResourcePriorityQueue::pop() { + if (empty()) + return nullptr; + + std::vector<SUnit *>::iterator Best = Queue.begin(); + if (!DisableDFASched) { + int BestCost = SUSchedulingCost(*Best); + for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { + + if (SUSchedulingCost(*I) > BestCost) { + BestCost = SUSchedulingCost(*I); + Best = I; + } + } + } + // Use default TD scheduling mechanism. + else { + for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + } + + SUnit *V = *Best; + if (Best != std::prev(Queue.end())) + std::swap(*Best, Queue.back()); + + Queue.pop_back(); + + return V; +} + + +void ResourcePriorityQueue::remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + std::vector<SUnit *>::iterator I = find(Queue, SU); + if (I != std::prev(Queue.end())) + std::swap(*I, Queue.back()); + + Queue.pop_back(); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h new file mode 100644 index 0000000000000..65b9d017fc5cd --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -0,0 +1,166 @@ +//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the SDDbgValue class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H + +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/DataTypes.h" +#include <utility> + +namespace llvm { + +class DIVariable; +class DIExpression; +class SDNode; +class Value; +class raw_ostream; + +/// Holds the information from a dbg_value node through SDISel. +/// We do not use SDValue here to avoid including its header. +class SDDbgValue { +public: + enum DbgValueKind { + SDNODE = 0, ///< Value is the result of an expression. + CONST = 1, ///< Value is a constant. + FRAMEIX = 2, ///< Value is contents of a stack location. + VREG = 3 ///< Value is a virtual register. + }; +private: + union { + struct { + SDNode *Node; ///< Valid for expressions. + unsigned ResNo; ///< Valid for expressions. + } s; + const Value *Const; ///< Valid for constants. + unsigned FrameIx; ///< Valid for stack objects. + unsigned VReg; ///< Valid for registers. + } u; + DIVariable *Var; + DIExpression *Expr; + DebugLoc DL; + unsigned Order; + enum DbgValueKind kind; + bool IsIndirect; + bool Invalid = false; + bool Emitted = false; + +public: + /// Constructor for non-constants. + SDDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, unsigned R, + bool indir, DebugLoc dl, unsigned O) + : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(indir) { + kind = SDNODE; + u.s.Node = N; + u.s.ResNo = R; + } + + /// Constructor for constants. + SDDbgValue(DIVariable *Var, DIExpression *Expr, const Value *C, DebugLoc dl, + unsigned O) + : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) { + kind = CONST; + u.Const = C; + } + + /// Constructor for virtual registers and frame indices. + SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned VRegOrFrameIdx, + bool IsIndirect, DebugLoc DL, unsigned Order, + enum DbgValueKind Kind) + : Var(Var), Expr(Expr), DL(DL), Order(Order), IsIndirect(IsIndirect) { + assert((Kind == VREG || Kind == FRAMEIX) && + "Invalid SDDbgValue constructor"); + kind = Kind; + if (kind == VREG) + u.VReg = VRegOrFrameIdx; + else + u.FrameIx = VRegOrFrameIdx; + } + + /// Returns the kind. + DbgValueKind getKind() const { return kind; } + + /// Returns the DIVariable pointer for the variable. + DIVariable *getVariable() const { return Var; } + + /// Returns the DIExpression pointer for the expression. + DIExpression *getExpression() const { return Expr; } + + /// Returns the SDNode* for a register ref + SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; } + + /// Returns the ResNo for a register ref + unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; } + + /// Returns the Value* for a constant + const Value *getConst() const { assert (kind==CONST); return u.Const; } + + /// Returns the FrameIx for a stack object + unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } + + /// Returns the Virtual Register for a VReg + unsigned getVReg() const { assert (kind==VREG); return u.VReg; } + + /// Returns whether this is an indirect value. + bool isIndirect() const { return IsIndirect; } + + /// Returns the DebugLoc. + DebugLoc getDebugLoc() const { return DL; } + + /// Returns the SDNodeOrder. This is the order of the preceding node in the + /// input. + unsigned getOrder() const { return Order; } + + /// setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" + /// property. A SDDbgValue is invalid if the SDNode that produces the value is + /// deleted. + void setIsInvalidated() { Invalid = true; } + bool isInvalidated() const { return Invalid; } + + /// setIsEmitted / isEmitted - Getter/Setter for flag indicating that this + /// SDDbgValue has been emitted to an MBB. + void setIsEmitted() { Emitted = true; } + bool isEmitted() const { return Emitted; } + + /// clearIsEmitted - Reset Emitted flag, for certain special cases where + /// dbg.addr is emitted twice. + void clearIsEmitted() { Emitted = false; } + + LLVM_DUMP_METHOD void dump() const; + LLVM_DUMP_METHOD void print(raw_ostream &OS) const; +}; + +/// Holds the information from a dbg_label node through SDISel. +/// We do not use SDValue here to avoid including its header. +class SDDbgLabel { + MDNode *Label; + DebugLoc DL; + unsigned Order; + +public: + SDDbgLabel(MDNode *Label, DebugLoc dl, unsigned O) + : Label(Label), DL(std::move(dl)), Order(O) {} + + /// Returns the MDNode pointer for the label. + MDNode *getLabel() const { return Label; } + + /// Returns the DebugLoc. + DebugLoc getDebugLoc() const { return DL; } + + /// Returns the SDNodeOrder. This is the order of the preceding node in the + /// input. + unsigned getOrder() const { return Order; } +}; + +} // end llvm namespace + +#endif diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp new file mode 100644 index 0000000000000..7ee44c808fcbf --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -0,0 +1,804 @@ +//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements a fast scheduler. +// +//===----------------------------------------------------------------------===// + +#include "InstrEmitter.h" +#include "ScheduleDAGSDNodes.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "pre-RA-sched" + +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical copies"); + +static RegisterScheduler + fastDAGScheduler("fast", "Fast suboptimal list scheduling", + createFastDAGScheduler); +static RegisterScheduler + linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling", + createDAGLinearizer); + + +namespace { + /// FastPriorityQueue - A degenerate priority queue that considers + /// all nodes to have the same priority. + /// + struct FastPriorityQueue { + SmallVector<SUnit *, 16> Queue; + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + Queue.push_back(U); + } + + SUnit *pop() { + if (empty()) return nullptr; + SUnit *V = Queue.back(); + Queue.pop_back(); + return V; + } + }; + +//===----------------------------------------------------------------------===// +/// ScheduleDAGFast - The actual "fast" list scheduler implementation. +/// +class ScheduleDAGFast : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + FastPriorityQueue AvailableQueue; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<unsigned> LiveRegCycles; + +public: + ScheduleDAGFast(MachineFunction &mf) + : ScheduleDAGSDNodes(mf) {} + + void Schedule() override; + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + void AddPred(SUnit *SU, const SDep &D) { + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + void RemovePred(SUnit *SU, const SDep &D) { + SU->removePred(D); + } + +private: + void ReleasePred(SUnit *SU, SDep *PredEdge); + void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ScheduleNodeBottomUp(SUnit*, unsigned); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVectorImpl<SUnit*>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); + void ListScheduleBottomUp(); + + /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool forceUnitLatencies() const override { return true; } +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGFast::Schedule() { + LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n"); + + NumLiveRegs = 0; + LiveRegDefs.resize(TRI->getNumRegs(), nullptr); + LiveRegCycles.resize(TRI->getNumRegs(), 0); + + // Build the scheduling graph. + BuildSchedGraph(nullptr); + + LLVM_DEBUG(dump()); + + // Execute the actual scheduling loop. + ListScheduleBottomUp(); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + dumpNode(*PredSU); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(nullptr); + } +#endif + --PredSU->NumSuccsLeft; + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + AvailableQueue.push(PredSU); + } +} + +void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { + // Bottom up: release predecessors + for (SDep &Pred : SU->Preds) { + ReleasePred(SU, &Pred); + if (Pred.isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (!LiveRegDefs[Pred.getReg()]) { + ++NumLiveRegs; + LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); + LiveRegCycles[Pred.getReg()] = CurCycle; + } + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + LLVM_DEBUG(dumpNode(*SU)); + + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleasePredecessors(SU, CurCycle); + + // Release all the implicit physical register defs that are live. + for (SDep &Succ : SU->Succs) { + if (Succ.isAssignedRegDep()) { + if (LiveRegCycles[Succ.getReg()] == Succ.getSUnit()->getHeight()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[Succ.getReg()] == SU && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[Succ.getReg()] = nullptr; + LiveRegCycles[Succ.getReg()] = 0; + } + } + } + + SU->isScheduled = true; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->getNode()->getGluedNode()) + return nullptr; + + SDNode *N = SU->getNode(); + if (!N) + return nullptr; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getSimpleValueType(i); + if (VT == MVT::Glue) + return nullptr; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (const SDValue &Op : N->op_values()) { + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + if (VT == MVT::Glue) + return nullptr; + } + + if (TryUnfold) { + SmallVector<SDNode*, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return nullptr; + + LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + SUnit *NewSU = newSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + NewSU->isCommutable = true; + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = newSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + } + + SDep ChainPred; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + ChainPred = Pred; + else if (Pred.getSUnit()->getNode() && + Pred.getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(Pred); + else + NodePreds.push_back(Pred); + } + for (SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) + ChainSuccs.push_back(Succ); + else + NodeSuccs.push_back(Succ); + } + + if (ChainPred.getSUnit()) { + RemovePred(SU, ChainPred); + if (isNewLoad) + AddPred(LoadSU, ChainPred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) { + AddPred(LoadSU, Pred); + } + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + if (isNewLoad) { + SDep D(LoadSU, SDep::Barrier); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); + } + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); + NewSU = Clone(SU); + + // New SUnit has the exact same predecessors. + for (SDep &Pred : SU->Preds) + if (!Pred.isArtificial()) + AddPred(NewSU, Pred); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) + continue; + SUnit *SuccSU = Succ.getSUnit(); + if (SuccSU->isScheduled) { + SDep D = Succ; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVectorImpl<SUnit*> &Copies) { + SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(nullptr)); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(nullptr)); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) + continue; + SUnit *SuccSU = Succ.getSUnit(); + if (SuccSU->isScheduled) { + SDep D = Succ; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, Succ)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + RemovePred(DelDeps[i].first, DelDeps[i].second); + } + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPred(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPred(CopyToSU, ToDep); + + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + unsigned NumRes; + if (N->getOpcode() == ISD::CopyFromReg) { + // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. + NumRes = 1; + } else { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + NumRes = MCID.getNumDefs(); + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + } + return N->getSimpleValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVectorImpl<unsigned> &LRegs, + const TargetRegisterInfo *TRI) { + bool Added = false; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { + if (RegAdded.insert(*AI).second) { + LRegs.push_back(*AI); + Added = true; + } + } + } + return Added; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, + SmallVectorImpl<unsigned> &LRegs){ + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + for (SDep &Pred : SU->Preds) { + if (Pred.isAssignedRegDep()) { + CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs, + RegAdded, LRegs, TRI); + } + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { + if (Node->getOpcode() == ISD::INLINEASM || + Node->getOpcode() == ISD::INLINEASM_BR) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + ++i; // Skip the ID value. + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (Register::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } + if (!Node->isMachineOpcode()) + continue; + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) + continue; + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { + CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } + return !LRegs.empty(); +} + + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGFast::ListScheduleBottomUp() { + unsigned CurCycle = 0; + + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU, CurCycle); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue.push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + SmallVector<SUnit*, 4> NotReady; + DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty()) { + bool Delayed = false; + LRegsMap.clear(); + SUnit *CurSU = AvailableQueue.pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + Delayed = true; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + NotReady.push_back(CurSU); + CurSU = AvailableQueue.pop(); + } + + // All candidates are delayed due to live physical reg dependencies. + // Try code duplication or inserting cross class copies + // to resolve it. + if (Delayed && !CurSU) { + if (!CurSU) { + // Try duplicating the nodes that produces these + // "expensive to copy" values to break the dependency. In case even + // that doesn't work, insert cross class copies. + SUnit *TrySU = NotReady[0]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is the same as RC, then it must be + // possible copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's + // possible to copy the value but it require cross register class copies + // and it is expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. + SUnit *NewDef = nullptr; + if (DestRC != RC) { + NewDef = CopyAndMoveSuccessors(LRDef); + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical " + "register dependency!"); + } + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); + AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); + NewDef = Copies.back(); + } + + LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Artificial)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + if (!CurSU) { + llvm_unreachable("Unable to resolve live physical register dependencies!"); + } + } + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + // May no longer be available due to backtracking. + if (NotReady[i]->isAvailable) + AvailableQueue.push(NotReady[i]); + } + NotReady.clear(); + + if (CurSU) + ScheduleNodeBottomUp(CurSU, CurCycle); + ++CurCycle; + } + + // Reverse the order since it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG + VerifyScheduledSequence(/*isBottomUp=*/true); +#endif +} + + +namespace { +//===----------------------------------------------------------------------===// +// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the +// DAG in topological order. +// IMPORTANT: this may not work for targets with phyreg dependency. +// +class ScheduleDAGLinearize : public ScheduleDAGSDNodes { +public: + ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} + + void Schedule() override; + + MachineBasicBlock * + EmitSchedule(MachineBasicBlock::iterator &InsertPos) override; + +private: + std::vector<SDNode*> Sequence; + DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user + + void ScheduleNode(SDNode *N); +}; +} // end anonymous namespace + +void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { + if (N->getNodeId() != 0) + llvm_unreachable(nullptr); + + if (!N->isMachineOpcode() && + (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) + // These nodes do not need to be translated into MIs. + return; + + LLVM_DEBUG(dbgs() << "\n*** Scheduling: "); + LLVM_DEBUG(N->dump(DAG)); + Sequence.push_back(N); + + unsigned NumOps = N->getNumOperands(); + if (unsigned NumLeft = NumOps) { + SDNode *GluedOpN = nullptr; + do { + const SDValue &Op = N->getOperand(NumLeft-1); + SDNode *OpN = Op.getNode(); + + if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { + // Schedule glue operand right above N. + GluedOpN = OpN; + assert(OpN->getNodeId() != 0 && "Glue operand not ready?"); + OpN->setNodeId(0); + ScheduleNode(OpN); + continue; + } + + if (OpN == GluedOpN) + // Glue operand is already scheduled. + continue; + + DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN); + if (DI != GluedMap.end() && DI->second != N) + // Users of glues are counted against the glued users. + OpN = DI->second; + + unsigned Degree = OpN->getNodeId(); + assert(Degree > 0 && "Predecessor over-released!"); + OpN->setNodeId(--Degree); + if (Degree == 0) + ScheduleNode(OpN); + } while (--NumLeft); + } +} + +/// findGluedUser - Find the representative use of a glue value by walking +/// the use chain. +static SDNode *findGluedUser(SDNode *N) { + while (SDNode *Glued = N->getGluedUser()) + N = Glued; + return N; +} + +void ScheduleDAGLinearize::Schedule() { + LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n"); + + SmallVector<SDNode*, 8> Glues; + unsigned DAGSize = 0; + for (SDNode &Node : DAG->allnodes()) { + SDNode *N = &Node; + + // Use node id to record degree. + unsigned Degree = N->use_size(); + N->setNodeId(Degree); + unsigned NumVals = N->getNumValues(); + if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && + N->hasAnyUseOfValue(NumVals-1)) { + SDNode *User = findGluedUser(N); + if (User) { + Glues.push_back(N); + GluedMap.insert(std::make_pair(N, User)); + } + } + + if (N->isMachineOpcode() || + (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) + ++DAGSize; + } + + for (unsigned i = 0, e = Glues.size(); i != e; ++i) { + SDNode *Glue = Glues[i]; + SDNode *GUser = GluedMap[Glue]; + unsigned Degree = Glue->getNodeId(); + unsigned UDegree = GUser->getNodeId(); + + // Glue user must be scheduled together with the glue operand. So other + // users of the glue operand must be treated as its users. + SDNode *ImmGUser = Glue->getGluedUser(); + for (const SDNode *U : Glue->uses()) + if (U == ImmGUser) + --Degree; + GUser->setNodeId(UDegree + Degree); + Glue->setNodeId(1); + } + + Sequence.reserve(DAGSize); + ScheduleNode(DAG->getRoot().getNode()); +} + +MachineBasicBlock* +ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap<SDValue, unsigned> VRBaseMap; + + LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); + + unsigned NumNodes = Sequence.size(); + MachineBasicBlock *BB = Emitter.getBlock(); + for (unsigned i = 0; i != NumNodes; ++i) { + SDNode *N = Sequence[NumNodes-i-1]; + LLVM_DEBUG(N->dump(DAG)); + Emitter.EmitNode(N, false, false, VRBaseMap); + + // Emit any debug values associated with the node. + if (N->getHasDebugValue()) { + MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); + for (auto DV : DAG->GetDbgValues(N)) { + if (!DV->isEmitted()) + if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap)) + BB->insert(InsertPos, DbgMI); + } + } + } + + LLVM_DEBUG(dbgs() << '\n'); + + InsertPos = Emitter.getInsertPos(); + return Emitter.getBlock(); +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGFast(*IS->MF); +} + +llvm::ScheduleDAGSDNodes * +llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGLinearize(*IS->MF); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp new file mode 100644 index 0000000000000..ff806bdb822c2 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -0,0 +1,3188 @@ +//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements bottom-up and top-down register pressure reduction list +// schedulers, using standard algorithms. The basic approach uses a priority +// queue of available nodes to schedule. One at a time, nodes are taken from +// the priority queue (thus in priority order), checked for legality to +// schedule, and emitted if legal. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <iterator> +#include <limits> +#include <memory> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "pre-RA-sched" + +STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical register copies"); + +static RegisterScheduler + burrListDAGScheduler("list-burr", + "Bottom-up register reduction list scheduling", + createBURRListDAGScheduler); + +static RegisterScheduler + sourceListDAGScheduler("source", + "Similar to list-burr but schedules in source " + "order when possible", + createSourceListDAGScheduler); + +static RegisterScheduler + hybridListDAGScheduler("list-hybrid", + "Bottom-up register pressure aware list scheduling " + "which tries to balance latency and register pressure", + createHybridListDAGScheduler); + +static RegisterScheduler + ILPListDAGScheduler("list-ilp", + "Bottom-up register pressure aware list scheduling " + "which tries to balance ILP and register pressure", + createILPListDAGScheduler); + +static cl::opt<bool> DisableSchedCycles( + "disable-sched-cycles", cl::Hidden, cl::init(false), + cl::desc("Disable cycle-level precision during preRA scheduling")); + +// Temporary sched=list-ilp flags until the heuristics are robust. +// Some options are also available under sched=list-hybrid. +static cl::opt<bool> DisableSchedRegPressure( + "disable-sched-reg-pressure", cl::Hidden, cl::init(false), + cl::desc("Disable regpressure priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedLiveUses( + "disable-sched-live-uses", cl::Hidden, cl::init(true), + cl::desc("Disable live use priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedVRegCycle( + "disable-sched-vrcycle", cl::Hidden, cl::init(false), + cl::desc("Disable virtual register cycle interference checks")); +static cl::opt<bool> DisableSchedPhysRegJoin( + "disable-sched-physreg-join", cl::Hidden, cl::init(false), + cl::desc("Disable physreg def-use affinity")); +static cl::opt<bool> DisableSchedStalls( + "disable-sched-stalls", cl::Hidden, cl::init(true), + cl::desc("Disable no-stall priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedCriticalPath( + "disable-sched-critical-path", cl::Hidden, cl::init(false), + cl::desc("Disable critical path priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedHeight( + "disable-sched-height", cl::Hidden, cl::init(false), + cl::desc("Disable scheduled-height priority in sched=list-ilp")); +static cl::opt<bool> Disable2AddrHack( + "disable-2addr-hack", cl::Hidden, cl::init(true), + cl::desc("Disable scheduler's two-address hack")); + +static cl::opt<int> MaxReorderWindow( + "max-sched-reorder", cl::Hidden, cl::init(6), + cl::desc("Number of instructions to allow ahead of the critical path " + "in sched=list-ilp")); + +static cl::opt<unsigned> AvgIPC( + "sched-avg-ipc", cl::Hidden, cl::init(1), + cl::desc("Average inst/cycle whan no target itinerary exists.")); + +namespace { + +//===----------------------------------------------------------------------===// +/// ScheduleDAGRRList - The actual register reduction list scheduler +/// implementation. This supports both top-down and bottom-up scheduling. +/// +class ScheduleDAGRRList : public ScheduleDAGSDNodes { +private: + /// NeedLatency - True if the scheduler will make use of latency information. + bool NeedLatency; + + /// AvailableQueue - The priority queue to use for the available SUnits. + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit *> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// CurCycle - The current scheduler state corresponds to this cycle. + unsigned CurCycle = 0; + + /// MinAvailableCycle - Cycle of the soonest available instruction. + unsigned MinAvailableCycle; + + /// IssueCount - Count instructions issued in this cycle + /// Currently valid only for bottom-up scheduling. + unsigned IssueCount; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::unique_ptr<SUnit*[]> LiveRegDefs; + std::unique_ptr<SUnit*[]> LiveRegGens; + + // Collect interferences between physical register use/defs. + // Each interference is an SUnit and set of physical registers. + SmallVector<SUnit*, 4> Interferences; + + using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>; + + LRegsMapT LRegsMap; + + /// Topo - A topological ordering for SUnits which permits fast IsReachable + /// and similar queries. + ScheduleDAGTopologicalSort Topo; + + // Hack to keep track of the inverse of FindCallSeqStart without more crazy + // DAG crawling. + DenseMap<SUnit*, SUnit*> CallSeqEndForStart; + +public: + ScheduleDAGRRList(MachineFunction &mf, bool needlatency, + SchedulingPriorityQueue *availqueue, + CodeGenOpt::Level OptLevel) + : ScheduleDAGSDNodes(mf), + NeedLatency(needlatency), AvailableQueue(availqueue), + Topo(SUnits, nullptr) { + const TargetSubtargetInfo &STI = mf.getSubtarget(); + if (DisableSchedCycles || !NeedLatency) + HazardRec = new ScheduleHazardRecognizer(); + else + HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); + } + + ~ScheduleDAGRRList() override { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule() override; + + ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } + + /// IsReachable - Checks if SU is reachable from TargetSU. + bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { + return Topo.IsReachable(SU, TargetSU); + } + + /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will + /// create a cycle. + bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + return Topo.WillCreateCycle(SU, TargetSU); + } + + /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + /// Does *NOT* update the topological ordering! It just queues an update. + void AddPredQueued(SUnit *SU, const SDep &D) { + Topo.AddPredQueued(SU, D.getSUnit()); + SU->addPred(D); + } + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + /// Updates the topological ordering if required. + void AddPred(SUnit *SU, const SDep &D) { + Topo.AddPred(SU, D.getSUnit()); + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + /// Updates the topological ordering if required. + void RemovePred(SUnit *SU, const SDep &D) { + Topo.RemovePred(SU, D.getSUnit()); + SU->removePred(D); + } + +private: + bool isReady(SUnit *SU) { + return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || + AvailableQueue->isReady(SU); + } + + void ReleasePred(SUnit *SU, const SDep *PredEdge); + void ReleasePredecessors(SUnit *SU); + void ReleasePending(); + void AdvanceToCycle(unsigned NextCycle); + void AdvancePastStalls(SUnit *SU); + void EmitNode(SUnit *SU); + void ScheduleNodeBottomUp(SUnit*); + void CapturePred(SDep *PredEdge); + void UnscheduleNodeBottomUp(SUnit*); + void RestoreHazardCheckerBottomUp(); + void BacktrackBottomUp(SUnit*, SUnit*); + SUnit *TryUnfoldSU(SUnit *); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVectorImpl<SUnit*>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); + + void releaseInterferences(unsigned Reg = 0); + + SUnit *PickNodeToScheduleBottomUp(); + void ListScheduleBottomUp(); + + /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. + SUnit *CreateNewSUnit(SDNode *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = newSUnit(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.MarkDirty(); + return NewNode; + } + + /// CreateClone - Creates a new SUnit from an existing one. + SUnit *CreateClone(SUnit *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = Clone(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.MarkDirty(); + return NewNode; + } + + /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't + /// need actual latency information but the hybrid scheduler does. + bool forceUnitLatencies() const override { + return !NeedLatency; + } +}; + +} // end anonymous namespace + +/// GetCostForDef - Looks up the register class and cost for a given definition. +/// Typically this just means looking up the representative register class, +/// but for untyped values (MVT::Untyped) it means inspecting the node's +/// opcode to determine what register class is being generated. +static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, + const TargetLowering *TLI, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + unsigned &RegClass, unsigned &Cost, + const MachineFunction &MF) { + MVT VT = RegDefPos.GetValue(); + + // Special handling for untyped values. These values can only come from + // the expansion of custom DAG-to-DAG patterns. + if (VT == MVT::Untyped) { + const SDNode *Node = RegDefPos.GetNode(); + + // Special handling for CopyFromReg of untyped values. + if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Opcode = Node->getMachineOpcode(); + if (Opcode == TargetOpcode::REG_SEQUENCE) { + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Idx = RegDefPos.GetIdx(); + const MCInstrDesc Desc = TII->get(Opcode); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); + RegClass = RC->getID(); + // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a + // better way to determine it. + Cost = 1; + } else { + RegClass = TLI->getRepRegClassFor(VT)->getID(); + Cost = TLI->getRepRegClassCostFor(VT); + } +} + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGRRList::Schedule() { + LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); + + CurCycle = 0; + IssueCount = 0; + MinAvailableCycle = + DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max(); + NumLiveRegs = 0; + // Allocate slots for each physical register, plus one for a special register + // to track the virtual resource of a calling sequence. + LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]()); + LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]()); + CallSeqEndForStart.clear(); + assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); + + // Build the scheduling graph. + BuildSchedGraph(nullptr); + + LLVM_DEBUG(dump()); + Topo.MarkDirty(); + + AvailableQueue->initNodes(SUnits); + + HazardRec->Reset(); + + // Execute the actual scheduling loop. + ListScheduleBottomUp(); + + AvailableQueue->releaseState(); + + LLVM_DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + dumpNode(*PredSU); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(nullptr); + } +#endif + --PredSU->NumSuccsLeft; + + if (!forceUnitLatencies()) { + // Updating predecessor's height. This is now the cycle when the + // predecessor can be scheduled without causing a pipeline stall. + PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); + } + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + + unsigned Height = PredSU->getHeight(); + if (Height < MinAvailableCycle) + MinAvailableCycle = Height; + + if (isReady(PredSU)) { + AvailableQueue->push(PredSU); + } + // CapturePred and others may have left the node in the pending queue, avoid + // adding it twice. + else if (!PredSU->isPending) { + PredSU->isPending = true; + PendingQueue.push_back(PredSU); + } + } +} + +/// IsChainDependent - Test if Outer is reachable from Inner through +/// chain dependencies. +static bool IsChainDependent(SDNode *Outer, SDNode *Inner, + unsigned NestLevel, + const TargetInstrInfo *TII) { + SDNode *N = Outer; + while (true) { + if (N == Inner) + return true; + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + for (const SDValue &Op : N->op_values()) + if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII)) + return true; + return false; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { + if (NestLevel == 0) + return false; + --NestLevel; + } + } + // Otherwise, find the chain and continue climbing. + for (const SDValue &Op : N->op_values()) + if (Op.getValueType() == MVT::Other) { + N = Op.getNode(); + goto found_chain_operand; + } + return false; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return false; + } +} + +/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate +/// the corresponding (lowered) CALLSEQ_BEGIN node. +/// +/// NestLevel and MaxNested are used in recursion to indcate the current level +/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum +/// level seen so far. +/// +/// TODO: It would be better to give CALLSEQ_END an explicit operand to point +/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. +static SDNode * +FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, + const TargetInstrInfo *TII) { + while (true) { + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + SDNode *Best = nullptr; + unsigned BestMaxNest = MaxNest; + for (const SDValue &Op : N->op_values()) { + unsigned MyNestLevel = NestLevel; + unsigned MyMaxNest = MaxNest; + if (SDNode *New = FindCallSeqStart(Op.getNode(), + MyNestLevel, MyMaxNest, TII)) + if (!Best || (MyMaxNest > BestMaxNest)) { + Best = New; + BestMaxNest = MyMaxNest; + } + } + assert(Best); + MaxNest = BestMaxNest; + return Best; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + MaxNest = std::max(MaxNest, NestLevel); + } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { + assert(NestLevel != 0); + --NestLevel; + if (NestLevel == 0) + return N; + } + } + // Otherwise, find the chain and continue climbing. + for (const SDValue &Op : N->op_values()) + if (Op.getValueType() == MVT::Other) { + N = Op.getNode(); + goto found_chain_operand; + } + return nullptr; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return nullptr; + } +} + +/// Call ReleasePred for each predecessor, then update register live def/gen. +/// Always update LiveRegDefs for a register dependence even if the current SU +/// also defines the register. This effectively create one large live range +/// across a sequence of two-address node. This is important because the +/// entire chain must be scheduled together. Example: +/// +/// flags = (3) add +/// flags = (2) addc flags +/// flags = (1) addc flags +/// +/// results in +/// +/// LiveRegDefs[flags] = 3 +/// LiveRegGens[flags] = 1 +/// +/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid +/// interference on flags. +void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { + // Bottom up: release predecessors + for (SDep &Pred : SU->Preds) { + ReleasePred(SU, &Pred); + if (Pred.isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef; + assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) && + "interference on register dependence"); + LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); + if (!LiveRegGens[Pred.getReg()]) { + ++NumLiveRegs; + LiveRegGens[Pred.getReg()] = SU; + } + } + } + + // If we're scheduling a lowered CALLSEQ_END, find the corresponding + // CALLSEQ_BEGIN. Inject an artificial physical register dependence between + // these nodes, to prevent other calls from being interscheduled with them. + unsigned CallResource = TRI->getNumRegs(); + if (!LiveRegDefs[CallResource]) + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { + unsigned NestLevel = 0; + unsigned MaxNest = 0; + SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); + assert(N && "Must find call sequence start"); + + SUnit *Def = &SUnits[N->getNodeId()]; + CallSeqEndForStart[Def] = SU; + + ++NumLiveRegs; + LiveRegDefs[CallResource] = Def; + LiveRegGens[CallResource] = SU; + break; + } +} + +/// Check to see if any of the pending instructions are ready to issue. If +/// so, add them to the available queue. +void ScheduleDAGRRList::ReleasePending() { + if (DisableSchedCycles) { + assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); + return; + } + + // If the available queue is empty, it is safe to reset MinAvailableCycle. + if (AvailableQueue->empty()) + MinAvailableCycle = std::numeric_limits<unsigned>::max(); + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + unsigned ReadyCycle = PendingQueue[i]->getHeight(); + if (ReadyCycle < MinAvailableCycle) + MinAvailableCycle = ReadyCycle; + + if (PendingQueue[i]->isAvailable) { + if (!isReady(PendingQueue[i])) + continue; + AvailableQueue->push(PendingQueue[i]); + } + PendingQueue[i]->isPending = false; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } +} + +/// Move the scheduler state forward by the specified number of Cycles. +void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { + if (NextCycle <= CurCycle) + return; + + IssueCount = 0; + AvailableQueue->setCurCycle(NextCycle); + if (!HazardRec->isEnabled()) { + // Bypass lots of virtual calls in case of long latency. + CurCycle = NextCycle; + } + else { + for (; CurCycle != NextCycle; ++CurCycle) { + HazardRec->RecedeCycle(); + } + } + // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the + // available Q to release pending nodes at least once before popping. + ReleasePending(); +} + +/// Move the scheduler state forward until the specified node's dependents are +/// ready and can be scheduled with no resource conflicts. +void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { + if (DisableSchedCycles) + return; + + // FIXME: Nodes such as CopyFromReg probably should not advance the current + // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node + // has predecessors the cycle will be advanced when they are scheduled. + // But given the crude nature of modeling latency though such nodes, we + // currently need to treat these nodes like real instructions. + // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; + + unsigned ReadyCycle = SU->getHeight(); + + // Bump CurCycle to account for latency. We assume the latency of other + // available instructions may be hidden by the stall (not a full pipe stall). + // This updates the hazard recognizer's cycle before reserving resources for + // this instruction. + AdvanceToCycle(ReadyCycle); + + // Calls are scheduled in their preceding cycle, so don't conflict with + // hazards from instructions after the call. EmitNode will reset the + // scoreboard state before emitting the call. + if (SU->isCall) + return; + + // FIXME: For resource conflicts in very long non-pipelined stages, we + // should probably skip ahead here to avoid useless scoreboard checks. + int Stalls = 0; + while (true) { + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(SU, -Stalls); + + if (HT == ScheduleHazardRecognizer::NoHazard) + break; + + ++Stalls; + } + AdvanceToCycle(CurCycle + Stalls); +} + +/// Record this SUnit in the HazardRecognizer. +/// Does not update CurCycle. +void ScheduleDAGRRList::EmitNode(SUnit *SU) { + if (!HazardRec->isEnabled()) + return; + + // Check for phys reg copy. + if (!SU->getNode()) + return; + + switch (SU->getNode()->getOpcode()) { + default: + assert(SU->getNode()->isMachineOpcode() && + "This target-independent node should not be scheduled."); + break; + case ISD::MERGE_VALUES: + case ISD::TokenFactor: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + case ISD::CopyToReg: + case ISD::CopyFromReg: + case ISD::EH_LABEL: + // Noops don't affect the scoreboard state. Copies are likely to be + // removed. + return; + case ISD::INLINEASM: + case ISD::INLINEASM_BR: + // For inline asm, clear the pipeline state. + HazardRec->Reset(); + return; + } + if (SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + + HazardRec->EmitInstruction(SU); +} + +static void resetVRegCycle(SUnit *SU); + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { + LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); + LLVM_DEBUG(dumpNode(*SU)); + +#ifndef NDEBUG + if (CurCycle < SU->getHeight()) + LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight() + << "] pipeline stall!\n"); +#endif + + // FIXME: Do not modify node height. It may interfere with + // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the + // node its ready cycle can aid heuristics, and after scheduling it can + // indicate the scheduled cycle. + SU->setHeightToAtLeast(CurCycle); + + // Reserve resources for the scheduled instruction. + EmitNode(SU); + + Sequence.push_back(SU); + + AvailableQueue->scheduledNode(SU); + + // If HazardRec is disabled, and each inst counts as one cycle, then + // advance CurCycle before ReleasePredecessors to avoid useless pushes to + // PendingQueue for schedulers that implement HasReadyFilter. + if (!HazardRec->isEnabled() && AvgIPC < 2) + AdvanceToCycle(CurCycle + 1); + + // Update liveness of predecessors before successors to avoid treating a + // two-address node as a live range def. + ReleasePredecessors(SU); + + // Release all the implicit physical register defs that are live. + for (SDep &Succ : SU->Succs) { + // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node. + if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[Succ.getReg()] = nullptr; + LiveRegGens[Succ.getReg()] = nullptr; + releaseInterferences(Succ.getReg()); + } + } + // Release the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = nullptr; + LiveRegGens[CallResource] = nullptr; + releaseInterferences(CallResource); + } + } + + resetVRegCycle(SU); + + SU->isScheduled = true; + + // Conditions under which the scheduler should eagerly advance the cycle: + // (1) No available instructions + // (2) All pipelines full, so available instructions must have hazards. + // + // If HazardRec is disabled, the cycle was pre-advanced before calling + // ReleasePredecessors. In that case, IssueCount should remain 0. + // + // Check AvailableQueue after ReleasePredecessors in case of zero latency. + if (HazardRec->isEnabled() || AvgIPC > 1) { + if (SU->getNode() && SU->getNode()->isMachineOpcode()) + ++IssueCount; + if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) + || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) + AdvanceToCycle(CurCycle + 1); + } +} + +/// CapturePred - This does the opposite of ReleasePred. Since SU is being +/// unscheduled, increase the succ left count of its predecessors. Remove +/// them from AvailableQueue if necessary. +void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + if (PredSU->isAvailable) { + PredSU->isAvailable = false; + if (!PredSU->isPending) + AvailableQueue->remove(PredSU); + } + + assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() && + "NumSuccsLeft will overflow!"); + ++PredSU->NumSuccsLeft; +} + +/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and +/// its predecessor states to reflect the change. +void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { + LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); + LLVM_DEBUG(dumpNode(*SU)); + + for (SDep &Pred : SU->Preds) { + CapturePred(&Pred); + if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){ + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[Pred.getReg()] = nullptr; + LiveRegGens[Pred.getReg()] = nullptr; + releaseInterferences(Pred.getReg()); + } + } + + // Reclaim the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { + SUnit *SeqEnd = CallSeqEndForStart[SU]; + assert(SeqEnd && "Call sequence start/end must be known"); + assert(!LiveRegDefs[CallResource]); + assert(!LiveRegGens[CallResource]); + ++NumLiveRegs; + LiveRegDefs[CallResource] = SU; + LiveRegGens[CallResource] = SeqEnd; + } + } + + // Release the special call resource dependence, if this is the end + // of a call. + if (LiveRegGens[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[CallResource]); + assert(LiveRegGens[CallResource]); + --NumLiveRegs; + LiveRegDefs[CallResource] = nullptr; + LiveRegGens[CallResource] = nullptr; + releaseInterferences(CallResource); + } + } + + for (auto &Succ : SU->Succs) { + if (Succ.isAssignedRegDep()) { + auto Reg = Succ.getReg(); + if (!LiveRegDefs[Reg]) + ++NumLiveRegs; + // This becomes the nearest def. Note that an earlier def may still be + // pending if this is a two-address node. + LiveRegDefs[Reg] = SU; + + // Update LiveRegGen only if was empty before this unscheduling. + // This is to avoid incorrect updating LiveRegGen set in previous run. + if (!LiveRegGens[Reg]) { + // Find the successor with the lowest height. + LiveRegGens[Reg] = Succ.getSUnit(); + for (auto &Succ2 : SU->Succs) { + if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg && + Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight()) + LiveRegGens[Reg] = Succ2.getSUnit(); + } + } + } + } + if (SU->getHeight() < MinAvailableCycle) + MinAvailableCycle = SU->getHeight(); + + SU->setHeightDirty(); + SU->isScheduled = false; + SU->isAvailable = true; + if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { + // Don't make available until backtracking is complete. + SU->isPending = true; + PendingQueue.push_back(SU); + } + else { + AvailableQueue->push(SU); + } + AvailableQueue->unscheduledNode(SU); +} + +/// After backtracking, the hazard checker needs to be restored to a state +/// corresponding the current cycle. +void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { + HazardRec->Reset(); + + unsigned LookAhead = std::min((unsigned)Sequence.size(), + HazardRec->getMaxLookAhead()); + if (LookAhead == 0) + return; + + std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead); + unsigned HazardCycle = (*I)->getHeight(); + for (auto E = Sequence.end(); I != E; ++I) { + SUnit *SU = *I; + for (; SU->getHeight() > HazardCycle; ++HazardCycle) { + HazardRec->RecedeCycle(); + } + EmitNode(SU); + } +} + +/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in +/// BTCycle in order to schedule a specific node. +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { + SUnit *OldSU = Sequence.back(); + while (true) { + Sequence.pop_back(); + // FIXME: use ready cycle instead of height + CurCycle = OldSU->getHeight(); + UnscheduleNodeBottomUp(OldSU); + AvailableQueue->setCurCycle(CurCycle); + if (OldSU == BtSU) + break; + OldSU = Sequence.back(); + } + + assert(!SU->isSucc(OldSU) && "Something is wrong!"); + + RestoreHazardCheckerBottomUp(); + + ReleasePending(); + + ++NumBacktracks; +} + +static bool isOperandOf(const SUnit *SU, SDNode *N) { + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isOperandOf(N)) + return true; + } + return false; +} + +/// TryUnfold - Attempt to unfold +SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { + SDNode *N = SU->getNode(); + // Use while over if to ease fall through. + SmallVector<SDNode *, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return nullptr; + + // unfolding an x86 DEC64m operation results in store, dec, load which + // can't be handled here so quit + if (NewNodes.size() == 3) + return nullptr; + + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + // If LoadSU has already been scheduled, we should clone it but + // this would negate the benefit to unfolding so just return SU. + if (LoadSU->isScheduled) + return SU; + isNewLoad = false; + } else { + LoadSU = CreateNewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + + InitNumRegDefsLeft(LoadSU); + computeLatency(LoadSU); + } + + bool isNewN = true; + SUnit *NewSU; + // This can only happen when isNewLoad is false. + if (N->getNodeId() != -1) { + NewSU = &SUnits[N->getNodeId()]; + // If NewSU has already been scheduled, we need to clone it, but this + // negates the benefit to unfolding so just return SU. + if (NewSU->isScheduled) { + return SU; + } + isNewN = false; + } else { + NewSU = CreateNewSUnit(N); + N->setNodeId(NewSU->NodeNum); + + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + NewSU->isCommutable = true; + + InitNumRegDefsLeft(NewSU); + computeLatency(NewSU); + } + + LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); + + // Now that we are committed to unfolding replace DAG Uses. + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1), + SDValue(LoadNode, 1)); + + // Record all the edges to and from the old SU, by category. + SmallVector<SDep, 4> ChainPreds; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + ChainPreds.push_back(Pred); + else if (isOperandOf(Pred.getSUnit(), LoadNode)) + LoadPreds.push_back(Pred); + else + NodePreds.push_back(Pred); + } + for (SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) + ChainSuccs.push_back(Succ); + else + NodeSuccs.push_back(Succ); + } + + // Now assign edges to the newly-created nodes. + for (const SDep &Pred : ChainPreds) { + RemovePred(SU, Pred); + if (isNewLoad) + AddPredQueued(LoadSU, Pred); + } + for (const SDep &Pred : LoadPreds) { + RemovePred(SU, Pred); + if (isNewLoad) + AddPredQueued(LoadSU, Pred); + } + for (const SDep &Pred : NodePreds) { + RemovePred(SU, Pred); + AddPredQueued(NewSU, Pred); + } + for (SDep D : NodeSuccs) { + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPredQueued(SuccDep, D); + // Balance register pressure. + if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled && + !D.isCtrl() && NewSU->NumRegDefsLeft > 0) + --NewSU->NumRegDefsLeft; + } + for (SDep D : ChainSuccs) { + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPredQueued(SuccDep, D); + } + } + + // Add a data dependency to reflect that NewSU reads the value defined + // by LoadSU. + SDep D(LoadSU, SDep::Data, 0); + D.setLatency(LoadSU->Latency); + AddPredQueued(NewSU, D); + + if (isNewLoad) + AvailableQueue->addNode(LoadSU); + if (isNewN) + AvailableQueue->addNode(NewSU); + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) + NewSU->isAvailable = true; + + return NewSU; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { + SDNode *N = SU->getNode(); + if (!N) + return nullptr; + + LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n"); + LLVM_DEBUG(dumpNode(*SU)); + + if (N->getGluedNode() && + !TII->canCopyGluedNodeDuringSchedule(N)) { + LLVM_DEBUG( + dbgs() + << "Giving up because it has incoming glue and the target does not " + "want to copy it\n"); + return nullptr; + } + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getSimpleValueType(i); + if (VT == MVT::Glue) { + LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n"); + return nullptr; + } else if (VT == MVT::Other) + TryUnfold = true; + } + for (const SDValue &Op : N->op_values()) { + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) { + LLVM_DEBUG( + dbgs() << "Giving up because it one of the operands is glue and " + "the target does not want to copy it\n"); + return nullptr; + } + } + + // If possible unfold instruction. + if (TryUnfold) { + SUnit *UnfoldSU = TryUnfoldSU(SU); + if (!UnfoldSU) + return nullptr; + SU = UnfoldSU; + N = SU->getNode(); + // If this can be scheduled don't bother duplicating and just return + if (SU->NumSuccsLeft == 0) + return SU; + } + + LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); + NewSU = CreateClone(SU); + + // New SUnit has the exact same predecessors. + for (SDep &Pred : SU->Preds) + if (!Pred.isArtificial()) + AddPredQueued(NewSU, Pred); + + // Make sure the clone comes after the original. (InstrEmitter assumes + // this ordering.) + AddPredQueued(NewSU, SDep(SU, SDep::Artificial)); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) + continue; + SUnit *SuccSU = Succ.getSUnit(); + if (SuccSU->isScheduled) { + SDep D = Succ; + D.setSUnit(NewSU); + AddPredQueued(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (auto &DelDep : DelDeps) + RemovePred(DelDep.first, DelDep.second); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(NewSU); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVectorImpl<SUnit*> &Copies) { + SUnit *CopyFromSU = CreateNewSUnit(nullptr); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = CreateNewSUnit(nullptr); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) + continue; + SUnit *SuccSU = Succ.getSUnit(); + if (SuccSU->isScheduled) { + SDep D = Succ; + D.setSUnit(CopyToSU); + AddPredQueued(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, Succ)); + } + else { + // Avoid scheduling the def-side copy before other successors. Otherwise + // we could introduce another physreg interference on the copy and + // continue inserting copies indefinitely. + AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial)); + } + } + for (auto &DelDep : DelDeps) + RemovePred(DelDep.first, DelDep.second); + + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPredQueued(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPredQueued(CopyToSU, ToDep); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(CopyFromSU); + AvailableQueue->addNode(CopyToSU); + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + unsigned NumRes; + if (N->getOpcode() == ISD::CopyFromReg) { + // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. + NumRes = 1; + } else { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + NumRes = MCID.getNumDefs(); + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + } + return N->getSimpleValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, + SUnit **LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVectorImpl<unsigned> &LRegs, + const TargetRegisterInfo *TRI) { + for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { + + // Check if Ref is live. + if (!LiveRegDefs[*AliasI]) continue; + + // Allow multiple uses of the same def. + if (LiveRegDefs[*AliasI] == SU) continue; + + // Add Reg to the set of interfering live regs. + if (RegAdded.insert(*AliasI).second) { + LRegs.push_back(*AliasI); + } + } +} + +/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered +/// by RegMask, and add them to LRegs. +static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, + ArrayRef<SUnit*> LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVectorImpl<unsigned> &LRegs) { + // Look at all live registers. Skip Reg0 and the special CallResource. + for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { + if (!LiveRegDefs[i]) continue; + if (LiveRegDefs[i] == SU) continue; + if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; + if (RegAdded.insert(i).second) + LRegs.push_back(i); + } +} + +/// getNodeRegMask - Returns the register mask attached to an SDNode, if any. +static const uint32_t *getNodeRegMask(const SDNode *N) { + for (const SDValue &Op : N->op_values()) + if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode())) + return RegOp->getRegMask(); + return nullptr; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGRRList:: +DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + // + // If SU is the currently live definition of the same register that it uses, + // then we are free to schedule it. + for (SDep &Pred : SU->Preds) { + if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU) + CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(), + RegAdded, LRegs, TRI); + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { + if (Node->getOpcode() == ISD::INLINEASM || + Node->getOpcode() == ISD::INLINEASM_BR) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + ++i; // Skip the ID value. + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (Register::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } + + if (!Node->isMachineOpcode()) + continue; + // If we're in the middle of scheduling a call, don't begin scheduling + // another call. Also, don't allow any physical registers to be live across + // the call. + if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { + // Check the special calling-sequence resource. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource]) { + SDNode *Gen = LiveRegGens[CallResource]->getNode(); + while (SDNode *Glued = Gen->getGluedNode()) + Gen = Glued; + if (!IsChainDependent(Gen, Node, 0, TII) && + RegAdded.insert(CallResource).second) + LRegs.push_back(CallResource); + } + } + if (const uint32_t *RegMask = getNodeRegMask(Node)) + CheckForLiveRegDefMasked(SU, RegMask, + makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), + RegAdded, LRegs); + + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (MCID.hasOptionalDef()) { + // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit. + // This operand can be either a def of CPSR, if the S bit is set; or a use + // of %noreg. When the OptionalDef is set to a valid register, we need to + // handle it in the same way as an ImplicitDef. + for (unsigned i = 0; i < MCID.getNumDefs(); ++i) + if (MCID.OpInfo[i].isOptionalDef()) { + const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues()); + unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); + } + } + if (!MCID.ImplicitDefs) + continue; + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) + CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); + } + + return !LRegs.empty(); +} + +void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = Interferences.size(); i > 0; --i) { + SUnit *SU = Interferences[i-1]; + LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); + if (Reg) { + SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; + if (!is_contained(LRegs, Reg)) + continue; + } + SU->isPending = false; + // The interfering node may no longer be available due to backtracking. + // Furthermore, it may have been made available again, in which case it is + // now already in the AvailableQueue. + if (SU->isAvailable && !SU->NodeQueueId) { + LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); + AvailableQueue->push(SU); + } + if (i < Interferences.size()) + Interferences[i-1] = Interferences.back(); + Interferences.pop_back(); + LRegsMap.erase(LRegsPos); + } +} + +/// Return a node that can be scheduled in this cycle. Requirements: +/// (1) Ready: latency has been satisfied +/// (2) No Hazards: resources are available +/// (3) No Interferences: may unschedule to break register interferences. +SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { + SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); + auto FindAvailableNode = [&]() { + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + LLVM_DEBUG(dbgs() << " Interfering reg "; + if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource"; + else dbgs() << printReg(LRegs[0], TRI); + dbgs() << " SU #" << CurSU->NodeNum << '\n'); + std::pair<LRegsMapT::iterator, bool> LRegsPair = + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + if (LRegsPair.second) { + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + Interferences.push_back(CurSU); + } + else { + assert(CurSU->isPending && "Interferences are pending"); + // Update the interference with current live regs. + LRegsPair.first->second = LRegs; + } + CurSU = AvailableQueue->pop(); + } + }; + FindAvailableNode(); + if (CurSU) + return CurSU; + + // We query the topological order in the loop body, so make sure outstanding + // updates are applied before entering it (we only enter the loop if there + // are some interferences). If we make changes to the ordering, we exit + // the loop. + + // All candidates are delayed due to live physical reg dependencies. + // Try backtracking, code duplication, or inserting cross class copies + // to resolve it. + for (SUnit *TrySU : Interferences) { + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; + + // Try unscheduling up to the point where it's safe to schedule + // this node. + SUnit *BtSU = nullptr; + unsigned LiveCycle = std::numeric_limits<unsigned>::max(); + for (unsigned Reg : LRegs) { + if (LiveRegGens[Reg]->getHeight() < LiveCycle) { + BtSU = LiveRegGens[Reg]; + LiveCycle = BtSU->getHeight(); + } + } + if (!WillCreateCycle(TrySU, BtSU)) { + // BacktrackBottomUp mutates Interferences! + BacktrackBottomUp(TrySU, BtSU); + + // Force the current node to be scheduled before the node that + // requires the physical reg dep. + if (BtSU->isAvailable) { + BtSU->isAvailable = false; + if (!BtSU->isPending) + AvailableQueue->remove(BtSU); + } + LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum + << ") to SU(" << TrySU->NodeNum << ")\n"); + AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial)); + + // If one or more successors has been unscheduled, then the current + // node is no longer available. + if (!TrySU->isAvailable || !TrySU->NodeQueueId) { + LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); + CurSU = AvailableQueue->pop(); + } else { + LLVM_DEBUG(dbgs() << "TrySU available\n"); + // Available and in AvailableQueue + AvailableQueue->remove(TrySU); + CurSU = TrySU; + } + FindAvailableNode(); + // Interferences has been mutated. We must break. + break; + } + } + + if (!CurSU) { + // Can't backtrack. If it's too expensive to copy the value, then try + // duplicate the nodes that produces these "too expensive to copy" + // values to break the dependency. In case even that doesn't work, + // insert cross class copies. + // If it's not too expensive, i.e. cost != -1, issue copies. + SUnit *TrySU = Interferences[0]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is the same as RC, then it must be possible + // copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's possible to + // copy the value but it require cross register class copies and it is + // expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. + SUnit *NewDef = nullptr; + if (DestRC != RC) { + NewDef = CopyAndMoveSuccessors(LRDef); + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical register dependency!"); + } + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); + AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial)); + NewDef = Copies.back(); + } + + LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); + LiveRegDefs[Reg] = NewDef; + AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + assert(CurSU && "Unable to resolve live physical register dependencies!"); + return CurSU; +} + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGRRList::ListScheduleBottomUp() { + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue->push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !Interferences.empty()) { + LLVM_DEBUG(dbgs() << "\nExamining Available:\n"; + AvailableQueue->dump(this)); + + // Pick the best node to schedule taking all constraints into + // consideration. + SUnit *SU = PickNodeToScheduleBottomUp(); + + AdvancePastStalls(SU); + + ScheduleNodeBottomUp(SU); + + while (AvailableQueue->empty() && !PendingQueue.empty()) { + // Advance the cycle to free resources. Skip ahead to the next ready SU. + assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() && + "MinAvailableCycle uninitialized"); + AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); + } + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG + VerifyScheduledSequence(/*isBottomUp=*/true); +#endif +} + +namespace { + +class RegReductionPQBase; + +struct queue_sort { + bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } +}; + +#ifndef NDEBUG +template<class SF> +struct reverse_sort : public queue_sort { + SF &SortFunc; + + reverse_sort(SF &sf) : SortFunc(sf) {} + + bool operator()(SUnit* left, SUnit* right) const { + // reverse left/right rather than simply !SortFunc(left, right) + // to expose different paths in the comparison logic. + return SortFunc(right, left); + } +}; +#endif // NDEBUG + +/// bu_ls_rr_sort - Priority function for bottom up register pressure +// reduction scheduler. +struct bu_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + + bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + + bool operator()(SUnit* left, SUnit* right) const; +}; + +// src_ls_rr_sort - Priority function for source order scheduler. +struct src_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + + src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + + bool operator()(SUnit* left, SUnit* right) const; +}; + +// hybrid_ls_rr_sort - Priority function for hybrid scheduler. +struct hybrid_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + + hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + + bool isReady(SUnit *SU, unsigned CurCycle) const; + + bool operator()(SUnit* left, SUnit* right) const; +}; + +// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) +// scheduler. +struct ilp_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + + ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + + bool isReady(SUnit *SU, unsigned CurCycle) const; + + bool operator()(SUnit* left, SUnit* right) const; +}; + +class RegReductionPQBase : public SchedulingPriorityQueue { +protected: + std::vector<SUnit *> Queue; + unsigned CurQueueId = 0; + bool TracksRegPressure; + bool SrcOrder; + + // SUnits - The SUnits for the current graph. + std::vector<SUnit> *SUnits; + + MachineFunction &MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + ScheduleDAGRRList *scheduleDAG = nullptr; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector<unsigned> SethiUllmanNumbers; + + /// RegPressure - Tracking current reg pressure per register class. + std::vector<unsigned> RegPressure; + + /// RegLimit - Tracking the number of allocatable registers per register + /// class. + std::vector<unsigned> RegLimit; + +public: + RegReductionPQBase(MachineFunction &mf, + bool hasReadyFilter, + bool tracksrp, + bool srcorder, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp), + SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) { + if (TracksRegPressure) { + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (const TargetRegisterClass *RC : TRI->regclasses()) + RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF); + } + } + + void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { + scheduleDAG = scheduleDag; + } + + ScheduleHazardRecognizer* getHazardRec() { + return scheduleDAG->getHazardRec(); + } + + void initNodes(std::vector<SUnit> &sunits) override; + + void addNode(const SUnit *SU) override; + + void updateNode(const SUnit *SU) override; + + void releaseState() override { + SUnits = nullptr; + SethiUllmanNumbers.clear(); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + } + + unsigned getNodePriority(const SUnit *SU) const; + + unsigned getNodeOrdering(const SUnit *SU) const { + if (!SU->getNode()) return 0; + + return SU->getNode()->getIROrder(); + } + + bool empty() const override { return Queue.empty(); } + + void push(SUnit *U) override { + assert(!U->NodeQueueId && "Node in the queue already"); + U->NodeQueueId = ++CurQueueId; + Queue.push_back(U); + } + + void remove(SUnit *SU) override { + assert(!Queue.empty() && "Queue is empty!"); + assert(SU->NodeQueueId != 0 && "Not in queue!"); + std::vector<SUnit *>::iterator I = llvm::find(Queue, SU); + if (I != std::prev(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); + SU->NodeQueueId = 0; + } + + bool tracksRegPressure() const override { return TracksRegPressure; } + + void dumpRegPressure() const; + + bool HighRegPressure(const SUnit *SU) const; + + bool MayReduceRegPressure(SUnit *SU) const; + + int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; + + void scheduledNode(SUnit *SU) override; + + void unscheduledNode(SUnit *SU) override; + +protected: + bool canClobber(const SUnit *SU, const SUnit *Op); + void AddPseudoTwoAddrDeps(); + void PrescheduleNodesWithMultipleUses(); + void CalculateSethiUllmanNumbers(); +}; + +template<class SF> +static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) { + std::vector<SUnit *>::iterator Best = Q.begin(); + for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != std::prev(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; +} + +template<class SF> +SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) { +#ifndef NDEBUG + if (DAG->StressSched) { + reverse_sort<SF> RPicker(Picker); + return popFromQueueImpl(Q, RPicker); + } +#endif + (void)DAG; + return popFromQueueImpl(Q, Picker); +} + +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Definition +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { + SF Picker; + +public: + RegReductionPriorityQueue(MachineFunction &mf, + bool tracksrp, + bool srcorder, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, + tii, tri, tli), + Picker(this) {} + + bool isBottomUp() const override { return SF::IsBottomUp; } + + bool isReady(SUnit *U) const override { + return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); + } + + SUnit *pop() override { + if (Queue.empty()) return nullptr; + + SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); + V->NodeQueueId = 0; + return V; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override { + // Emulate pop() without clobbering NodeQueueIds. + std::vector<SUnit *> DumpQueue = Queue; + SF DumpPicker = Picker; + while (!DumpQueue.empty()) { + SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); + dbgs() << "Height " << SU->getHeight() << ": "; + DAG->dumpNode(*SU); + } + } +#endif +}; + +using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>; +using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>; +using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>; +using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Static Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// + +// Check for special nodes that bypass scheduling heuristics. +// Currently this pushes TokenFactor nodes down, but may be used for other +// pseudo-ops as well. +// +// Return -1 to schedule right above left, 1 for left above right. +// Return 0 if no bias exists. +static int checkSpecialNodes(const SUnit *left, const SUnit *right) { + bool LSchedLow = left->isScheduleLow; + bool RSchedLow = right->isScheduleLow; + if (LSchedLow != RSchedLow) + return LSchedLow < RSchedLow ? 1 : -1; + return 0; +} + +/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. +/// Smaller number is the higher priority. +static unsigned +CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { + if (SUNumbers[SU->NodeNum] != 0) + return SUNumbers[SU->NodeNum]; + + // Use WorkList to avoid stack overflow on excessively large IRs. + struct WorkState { + WorkState(const SUnit *SU) : SU(SU) {} + const SUnit *SU; + unsigned PredsProcessed = 0; + }; + + SmallVector<WorkState, 16> WorkList; + WorkList.push_back(SU); + while (!WorkList.empty()) { + auto &Temp = WorkList.back(); + auto *TempSU = Temp.SU; + bool AllPredsKnown = true; + // Try to find a non-evaluated pred and push it into the processing stack. + for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) { + auto &Pred = TempSU->Preds[P]; + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); + if (SUNumbers[PredSU->NodeNum] == 0) { +#ifndef NDEBUG + // In debug mode, check that we don't have such element in the stack. + for (auto It : WorkList) + assert(It.SU != PredSU && "Trying to push an element twice?"); +#endif + // Next time start processing this one starting from the next pred. + Temp.PredsProcessed = P + 1; + WorkList.push_back(PredSU); + AllPredsKnown = false; + break; + } + } + + if (!AllPredsKnown) + continue; + + // Once all preds are known, we can calculate the answer for this one. + unsigned SethiUllmanNumber = 0; + unsigned Extra = 0; + for (const SDep &Pred : TempSU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); + unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum]; + assert(PredSethiUllman > 0 && "We should have evaluated this pred!"); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber) + ++Extra; + } + + SethiUllmanNumber += Extra; + if (SethiUllmanNumber == 0) + SethiUllmanNumber = 1; + SUNumbers[TempSU->NodeNum] = SethiUllmanNumber; + WorkList.pop_back(); + } + + assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!"); + return SUNumbers[SU->NodeNum]; +} + +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +void RegReductionPQBase::CalculateSethiUllmanNumbers() { + SethiUllmanNumbers.assign(SUnits->size(), 0); + + for (const SUnit &SU : *SUnits) + CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers); +} + +void RegReductionPQBase::addNode(const SUnit *SU) { + unsigned SUSize = SethiUllmanNumbers.size(); + if (SUnits->size() > SUSize) + SethiUllmanNumbers.resize(SUSize*2, 0); + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} + +void RegReductionPQBase::updateNode(const SUnit *SU) { + SethiUllmanNumbers[SU->NodeNum] = 0; + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} + +// Lower priority means schedule further down. For bottom-up scheduling, lower +// priority SUs are scheduled before higher priority SUs. +unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { + assert(SU->NodeNum < SethiUllmanNumbers.size()); + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return 0; + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return 0; + if (SU->NumSuccs == 0 && SU->NumPreds != 0) + // If SU does not have a register use, i.e. it doesn't produce a value + // that would be consumed (e.g. store), then it terminates a chain of + // computation. Give it a large SethiUllman number so it will be + // scheduled right before its predecessors that it doesn't lengthen + // their live ranges. + return 0xffff; + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return 0; +#if 1 + return SethiUllmanNumbers[SU->NodeNum]; +#else + unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; + if (SU->isCallOp) { + // FIXME: This assumes all of the defs are used as call operands. + int NP = (int)Priority - SU->getNode()->getNumValues(); + return (NP > 0) ? NP : 0; + } + return Priority; +#endif +} + +//===----------------------------------------------------------------------===// +// Register Pressure Tracking +//===----------------------------------------------------------------------===// + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const { + for (const TargetRegisterClass *RC : TRI->regclasses()) { + unsigned Id = RC->getID(); + unsigned RP = RegPressure[Id]; + if (!RP) continue; + LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " + << RegLimit[Id] << '\n'); + } +} +#endif + +bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { + if (!TLI) + return false; + + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + continue; + SUnit *PredSU = Pred.getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + continue; + } + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); + + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + } + } + return false; +} + +bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { + const SDNode *N = SU->getNode(); + + if (!N->isMachineOpcode() || !SU->NumSuccs) + return false; + + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + MVT VT = N->getSimpleValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + return true; + } + return false; +} + +// Compute the register pressure contribution by this instruction by count up +// for uses that are not live and down for defs. Only count register classes +// that are already under high pressure. As a side effect, compute the number of +// uses of registers that are already live. +// +// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure +// so could probably be factored. +int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { + LiveUses = 0; + int PDiff = 0; + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + continue; + SUnit *PredSU = Pred.getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + if (PredSU->getNode()->isMachineOpcode()) + ++LiveUses; + continue; + } + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + MVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + ++PDiff; + } + } + const SDNode *N = SU->getNode(); + + if (!N || !N->isMachineOpcode() || !SU->NumSuccs) + return PDiff; + + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + MVT VT = N->getSimpleValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + --PDiff; + } + return PDiff; +} + +void RegReductionPQBase::scheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + if (!SU->getNode()) + return; + + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + continue; + SUnit *PredSU = Pred.getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + continue; + } + // FIXME: The ScheduleDAG currently loses information about which of a + // node's values is consumed by each dependence. Consequently, if the node + // defines multiple register classes, we don't know which to pressurize + // here. Instead the following loop consumes the register defs in an + // arbitrary order. At least it handles the common case of clustered loads + // to the same class. For precise liveness, each SDep needs to indicate the + // result number. But that tightly couples the ScheduleDAG with the + // SelectionDAG making updates tricky. A simpler hack would be to attach a + // value type or register class to SDep. + // + // The most important aspect of register tracking is balancing the increase + // here with the reduction further below. Note that this SU may use multiple + // defs in PredSU. The can't be determined here, but we've already + // compensated by reducing NumRegDefsLeft in PredSU during + // ScheduleDAGSDNodes::AddSchedEdges. + --PredSU->NumRegDefsLeft; + unsigned SkipRegDefs = PredSU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs) + continue; + + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); + RegPressure[RCId] += Cost; + break; + } + } + + // We should have this assert, but there may be dead SDNodes that never + // materialize as SUnits, so they don't appear to generate liveness. + //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); + int SkipRegDefs = (int)SU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs > 0) + continue; + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); + if (RegPressure[RCId] < Cost) { + // Register pressure tracking is imprecise. This can happen. But we try + // hard not to let it happen because it likely results in poor scheduling. + LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum + << ") has too many regdefs\n"); + RegPressure[RCId] = 0; + } + else { + RegPressure[RCId] -= Cost; + } + } + LLVM_DEBUG(dumpRegPressure()); +} + +void RegReductionPQBase::unscheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + const SDNode *N = SU->getNode(); + if (!N) return; + + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } + + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + continue; + SUnit *PredSU = Pred.getSUnit(); + // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only + // counts data deps. + if (PredSU->NumSuccsLeft != PredSU->Succs.size()) + continue; + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + MVT VT = PN->getSimpleValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG || + POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + MVT VT = PN->getSimpleValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + MVT VT = PN->getSimpleValueType(i); + if (!PN->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + + // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() + // may transfer data dependencies to CopyToReg. + if (SU->NumSuccs && N->isMachineOpcode()) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getSimpleValueType(i); + if (VT == MVT::Glue || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + } + + LLVM_DEBUG(dumpRegPressure()); +} + +//===----------------------------------------------------------------------===// +// Dynamic Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// + +/// closestSucc - Returns the scheduled cycle of the successor which is +/// closest to the current cycle. +static unsigned closestSucc(const SUnit *SU) { + unsigned MaxHeight = 0; + for (const SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) continue; // ignore chain succs + unsigned Height = Succ.getSUnit()->getHeight(); + // If there are bunch of CopyToRegs stacked up, they should be considered + // to be at the same position. + if (Succ.getSUnit()->getNode() && + Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) + Height = closestSucc(Succ.getSUnit())+1; + if (Height > MaxHeight) + MaxHeight = Height; + } + return MaxHeight; +} + +/// calcMaxScratches - Returns an cost estimate of the worse case requirement +/// for scratch registers, i.e. number of data dependencies. +static unsigned calcMaxScratches(const SUnit *SU) { + unsigned Scratches = 0; + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + Scratches++; + } + return Scratches; +} + +/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are +/// CopyFromReg from a virtual register. +static bool hasOnlyLiveInOpers(const SUnit *SU) { + bool RetVal = false; + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; + const SUnit *PredSU = Pred.getSUnit(); + if (PredSU->getNode() && + PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = + cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); + if (Register::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +/// hasOnlyLiveOutUses - Return true if SU has only value successors that are +/// CopyToReg to a virtual register. This SU def is probably a liveout and +/// it has no other use. It should be scheduled closer to the terminator. +static bool hasOnlyLiveOutUses(const SUnit *SU) { + bool RetVal = false; + for (const SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) continue; + const SUnit *SuccSU = Succ.getSUnit(); + if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { + unsigned Reg = + cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); + if (Register::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +// Set isVRegCycle for a node with only live in opers and live out uses. Also +// set isVRegCycle for its CopyFromReg operands. +// +// This is only relevant for single-block loops, in which case the VRegCycle +// node is likely an induction variable in which the operand and target virtual +// registers should be coalesced (e.g. pre/post increment values). Setting the +// isVRegCycle flag helps the scheduler prioritize other uses of the same +// CopyFromReg so that this node becomes the virtual register "kill". This +// avoids interference between the values live in and out of the block and +// eliminates a copy inside the loop. +static void initVRegCycle(SUnit *SU) { + if (DisableSchedVRegCycle) + return; + + if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) + return; + + LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); + + SU->isVRegCycle = true; + + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; + Pred.getSUnit()->isVRegCycle = true; + } +} + +// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of +// CopyFromReg operands. We should no longer penalize other uses of this VReg. +static void resetVRegCycle(SUnit *SU) { + if (!SU->isVRegCycle) + return; + + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); + if (PredSU->isVRegCycle) { + assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && + "VRegCycle def must be CopyFromReg"); + Pred.getSUnit()->isVRegCycle = false; + } + } +} + +// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This +// means a node that defines the VRegCycle has not been scheduled yet. +static bool hasVRegCycleUse(const SUnit *SU) { + // If this SU also defines the VReg, don't hoist it as a "use". + if (SU->isVRegCycle) + return false; + + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + if (Pred.getSUnit()->isVRegCycle && + Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { + LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); + return true; + } + } + return false; +} + +// Check for either a dependence (latency) or resource (hazard) stall. +// +// Note: The ScheduleHazardRecognizer interface requires a non-const SU. +static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { + if ((int)SPQ->getCurCycle() < Height) return true; + if (SPQ->getHazardRec()->getHazardType(SU, 0) + != ScheduleHazardRecognizer::NoHazard) + return true; + return false; +} + +// Return -1 if left has higher priority, 1 if right has higher priority. +// Return 0 if latency-based priority is equivalent. +static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, + RegReductionPQBase *SPQ) { + // Scheduling an instruction that uses a VReg whose postincrement has not yet + // been scheduled will induce a copy. Model this as an extra cycle of latency. + int LPenalty = hasVRegCycleUse(left) ? 1 : 0; + int RPenalty = hasVRegCycleUse(right) ? 1 : 0; + int LHeight = (int)left->getHeight() + LPenalty; + int RHeight = (int)right->getHeight() + RPenalty; + + bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && + BUHasStall(left, LHeight, SPQ); + bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && + BUHasStall(right, RHeight, SPQ); + + // If scheduling one of the node will cause a pipeline stall, delay it. + // If scheduling either one of the node will cause a pipeline stall, sort + // them according to their height. + if (LStall) { + if (!RStall) + return 1; + if (LHeight != RHeight) + return LHeight > RHeight ? 1 : -1; + } else if (RStall) + return -1; + + // If either node is scheduling for latency, sort them by height/depth + // and latency. + if (!checkPref || (left->SchedulingPref == Sched::ILP || + right->SchedulingPref == Sched::ILP)) { + // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer + // is enabled, grouping instructions by cycle, then its height is already + // covered so only its depth matters. We also reach this point if both stall + // but have the same height. + if (!SPQ->getHazardRec()->isEnabled()) { + if (LHeight != RHeight) + return LHeight > RHeight ? 1 : -1; + } + int LDepth = left->getDepth() - LPenalty; + int RDepth = right->getDepth() - RPenalty; + if (LDepth != RDepth) { + LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum + << ") depth " << LDepth << " vs SU (" << right->NodeNum + << ") depth " << RDepth << "\n"); + return LDepth < RDepth ? 1 : -1; + } + if (left->Latency != right->Latency) + return left->Latency > right->Latency ? 1 : -1; + } + return 0; +} + +static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { + // Schedule physical register definitions close to their use. This is + // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as + // long as shortening physreg live ranges is generally good, we can defer + // creating a subtarget hook. + if (!DisableSchedPhysRegJoin) { + bool LHasPhysReg = left->hasPhysRegDefs; + bool RHasPhysReg = right->hasPhysRegDefs; + if (LHasPhysReg != RHasPhysReg) { + #ifndef NDEBUG + static const char *const PhysRegMsg[] = { " has no physreg", + " defines a physreg" }; + #endif + LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") " + << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum + << ") " << PhysRegMsg[RHasPhysReg] << "\n"); + return LHasPhysReg < RHasPhysReg; + } + } + + // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + + // Be really careful about hoisting call operands above previous calls. + // Only allows it if it would reduce register pressure. + if (left->isCall && right->isCallOp) { + unsigned RNumVals = right->getNode()->getNumValues(); + RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; + } + if (right->isCall && left->isCallOp) { + unsigned LNumVals = left->getNode()->getNumValues(); + LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; + } + + if (LPriority != RPriority) + return LPriority > RPriority; + + // One or both of the nodes are calls and their sethi-ullman numbers are the + // same, then keep source order. + if (left->isCall || right->isCall) { + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + } + + // Try schedule def + use closer when Sethi-Ullman numbers are the same. + // e.g. + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // and the following instructions are both ready. + // t2 = op c3 + // t4 = op c4 + // + // Then schedule t2 = op first. + // i.e. + // t4 = op c4 + // t2 = op c3 + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // This creates more short live intervals. + unsigned LDist = closestSucc(left); + unsigned RDist = closestSucc(right); + if (LDist != RDist) + return LDist < RDist; + + // How many registers becomes live when the node is scheduled. + unsigned LScratch = calcMaxScratches(left); + unsigned RScratch = calcMaxScratches(right); + if (LScratch != RScratch) + return LScratch > RScratch; + + // Comparing latency against a call makes little sense unless the node + // is register pressure-neutral. + if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) + return (left->NodeQueueId > right->NodeQueueId); + + // Do not compare latencies when one or both of the nodes are calls. + if (!DisableSchedCycles && + !(left->isCall || right->isCall)) { + int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); + if (result != 0) + return result > 0; + } + else { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + } + + assert(left->NodeQueueId && right->NodeQueueId && + "NodeQueueId cannot be zero"); + return (left->NodeQueueId > right->NodeQueueId); +} + +// Bottom up +bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + return BURRSort(left, right, SPQ); +} + +// Source order, otherwise bottom up. +bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + + return BURRSort(left, right, SPQ); +} + +// If the time between now and when the instruction will be ready can cover +// the spill code, then avoid adding it to the ready queue. This gives long +// stalls highest priority and allows hoisting across calls. It should also +// speed up processing the available queue. +bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + static const unsigned ReadyDelay = 3; + + if (SPQ->MayReduceRegPressure(SU)) return true; + + if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; + + if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) + != ScheduleHazardRecognizer::NoHazard) + return false; + + return true; +} + +// Return true if right should be scheduled with higher priority than left. +bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + + bool LHigh = SPQ->HighRegPressure(left); + bool RHigh = SPQ->HighRegPressure(right); + // Avoid causing spills. If register pressure is high, schedule for + // register pressure reduction. + if (LHigh && !RHigh) { + LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" + << right->NodeNum << ")\n"); + return true; + } + else if (!LHigh && RHigh) { + LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" + << left->NodeNum << ")\n"); + return false; + } + if (!LHigh && !RHigh) { + int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); + if (result != 0) + return result > 0; + } + return BURRSort(left, right, SPQ); +} + +// Schedule as many instructions in each cycle as possible. So don't make an +// instruction available unless it is ready in the current cycle. +bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + if (SU->getHeight() > CurCycle) return false; + + if (SPQ->getHazardRec()->getHazardType(SU, 0) + != ScheduleHazardRecognizer::NoHazard) + return false; + + return true; +} + +static bool canEnableCoalescing(SUnit *SU) { + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return true; + + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return true; + + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return true; + + return false; +} + +// list-ilp is currently an experimental scheduler that allows various +// heuristics to be enabled prior to the normal register reduction logic. +bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + + unsigned LLiveUses = 0, RLiveUses = 0; + int LPDiff = 0, RPDiff = 0; + if (!DisableSchedRegPressure || !DisableSchedLiveUses) { + LPDiff = SPQ->RegPressureDiff(left, LLiveUses); + RPDiff = SPQ->RegPressureDiff(right, RLiveUses); + } + if (!DisableSchedRegPressure && LPDiff != RPDiff) { + LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum + << "): " << LPDiff << " != SU(" << right->NodeNum + << "): " << RPDiff << "\n"); + return LPDiff > RPDiff; + } + + if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { + bool LReduce = canEnableCoalescing(left); + bool RReduce = canEnableCoalescing(right); + if (LReduce && !RReduce) return false; + if (RReduce && !LReduce) return true; + } + + if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { + LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses + << " != SU(" << right->NodeNum << "): " << RLiveUses + << "\n"); + return LLiveUses < RLiveUses; + } + + if (!DisableSchedStalls) { + bool LStall = BUHasStall(left, left->getHeight(), SPQ); + bool RStall = BUHasStall(right, right->getHeight(), SPQ); + if (LStall != RStall) + return left->getHeight() > right->getHeight(); + } + + if (!DisableSchedCriticalPath) { + int spread = (int)left->getDepth() - (int)right->getDepth(); + if (std::abs(spread) > MaxReorderWindow) { + LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " + << left->getDepth() << " != SU(" << right->NodeNum + << "): " << right->getDepth() << "\n"); + return left->getDepth() < right->getDepth(); + } + } + + if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { + int spread = (int)left->getHeight() - (int)right->getHeight(); + if (std::abs(spread) > MaxReorderWindow) + return left->getHeight() > right->getHeight(); + } + + return BURRSort(left, right, SPQ); +} + +void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + if (!Disable2AddrHack) + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + if (!TracksRegPressure && !SrcOrder) + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + + // For single block loops, mark nodes that look like canonical IV increments. + if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) + for (SUnit &SU : sunits) + initVRegCycle(&SU); +} + +//===----------------------------------------------------------------------===// +// Preschedule for Register Pressure +//===----------------------------------------------------------------------===// + +bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { + if (SU->isTwoAddress) { + unsigned Opc = SU->getNode()->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; + for (unsigned i = 0; i != NumOps; ++i) { + if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { + SDNode *DU = SU->getNode()->getOperand(i).getNode(); + if (DU->getNodeId() != -1 && + Op->OrigNode == &(*SUnits)[DU->getNodeId()]) + return true; + } + } + } + return false; +} + +/// canClobberReachingPhysRegUse - True if SU would clobber one of it's +/// successor's explicit physregs whose definition can reach DepSU. +/// i.e. DepSU should not be scheduled above SU. +static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, + ScheduleDAGRRList *scheduleDAG, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + const MCPhysReg *ImpDefs + = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); + const uint32_t *RegMask = getNodeRegMask(SU->getNode()); + if(!ImpDefs && !RegMask) + return false; + + for (const SDep &Succ : SU->Succs) { + SUnit *SuccSU = Succ.getSUnit(); + for (const SDep &SuccPred : SuccSU->Preds) { + if (!SuccPred.isAssignedRegDep()) + continue; + + if (RegMask && + MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) && + scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) + return true; + + if (ImpDefs) + for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + // Return true if SU clobbers this physical register use and the + // definition of the register reaches from DepSU. IsReachable queries + // a topological forward sort of the DAG (following the successors). + if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) && + scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) + return true; + } + } + return false; +} + +/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's +/// physical register defs. +static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SDNode *N = SuccSU->getNode(); + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + assert(ImpDefs && "Caller should check hasPhysRegDefs"); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (!SUNode->isMachineOpcode()) + continue; + const MCPhysReg *SUImpDefs = + TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); + const uint32_t *SURegMask = getNodeRegMask(SUNode); + if (!SUImpDefs && !SURegMask) + continue; + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getSimpleValueType(i); + if (VT == MVT::Glue || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned Reg = ImpDefs[i - NumDefs]; + if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) + return true; + if (!SUImpDefs) + continue; + for (;*SUImpDefs; ++SUImpDefs) { + unsigned SUReg = *SUImpDefs; + if (TRI->regsOverlap(Reg, SUReg)) + return true; + } + } + } + return false; +} + +/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses +/// are not handled well by the general register pressure reduction +/// heuristics. When presented with code like this: +/// +/// N +/// / | +/// / | +/// U store +/// | +/// ... +/// +/// the heuristics tend to push the store up, but since the +/// operand of the store has another use (U), this would increase +/// the length of that other use (the U->N edge). +/// +/// This function transforms code like the above to route U's +/// dependence through the store when possible, like this: +/// +/// N +/// || +/// || +/// store +/// | +/// U +/// | +/// ... +/// +/// This results in the store being scheduled immediately +/// after N, which shortens the U->N live range, reducing +/// register pressure. +void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { + // Visit all the nodes in topological order, working top-down. + for (SUnit &SU : *SUnits) { + // For now, only look at nodes with no data successors, such as stores. + // These are especially important, due to the heuristics in + // getNodePriority for nodes with no data successors. + if (SU.NumSuccs != 0) + continue; + // For now, only look at nodes with exactly one data predecessor. + if (SU.NumPreds != 1) + continue; + // Avoid prescheduling copies to virtual registers, which don't behave + // like other nodes from the perspective of scheduling heuristics. + if (SDNode *N = SU.getNode()) + if (N->getOpcode() == ISD::CopyToReg && + Register::isVirtualRegister( + cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + SDNode *PredFrameSetup = nullptr; + for (const SDep &Pred : SU.Preds) + if (Pred.isCtrl() && Pred.getSUnit()) { + // Find the predecessor which is not data dependence. + SDNode *PredND = Pred.getSUnit()->getNode(); + + // If PredND is FrameSetup, we should not pre-scheduled the node, + // or else, when bottom up scheduling, ADJCALLSTACKDOWN and + // ADJCALLSTACKUP may hold CallResource too long and make other + // calls can't be scheduled. If there's no other available node + // to schedule, the schedular will try to rename the register by + // creating copy to avoid the conflict which will fail because + // CallResource is not a real physical register. + if (PredND && PredND->isMachineOpcode() && + (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) { + PredFrameSetup = PredND; + break; + } + } + // Skip the node has FrameSetup parent. + if (PredFrameSetup != nullptr) + continue; + + // Locate the single data predecessor. + SUnit *PredSU = nullptr; + for (const SDep &Pred : SU.Preds) + if (!Pred.isCtrl()) { + PredSU = Pred.getSUnit(); + break; + } + assert(PredSU); + + // Don't rewrite edges that carry physregs, because that requires additional + // support infrastructure. + if (PredSU->hasPhysRegDefs) + continue; + // Short-circuit the case where SU is PredSU's only data successor. + if (PredSU->NumSuccs == 1) + continue; + // Avoid prescheduling to copies from virtual registers, which don't behave + // like other nodes from the perspective of scheduling heuristics. + if (SDNode *N = SU.getNode()) + if (N->getOpcode() == ISD::CopyFromReg && + Register::isVirtualRegister( + cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + // Perform checks on the successors of PredSU. + for (const SDep &PredSucc : PredSU->Succs) { + SUnit *PredSuccSU = PredSucc.getSUnit(); + if (PredSuccSU == &SU) continue; + // If PredSU has another successor with no data successors, for + // now don't attempt to choose either over the other. + if (PredSuccSU->NumSuccs == 0) + goto outer_loop_continue; + // Don't break physical register dependencies. + if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) + if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI)) + goto outer_loop_continue; + // Don't introduce graph cycles. + if (scheduleDAG->IsReachable(&SU, PredSuccSU)) + goto outer_loop_continue; + } + + // Ok, the transformation is safe and the heuristics suggest it is + // profitable. Update the graph. + LLVM_DEBUG( + dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #" + << PredSU->NodeNum + << " to guide scheduling in the presence of multiple uses\n"); + for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { + SDep Edge = PredSU->Succs[i]; + assert(!Edge.isAssignedRegDep()); + SUnit *SuccSU = Edge.getSUnit(); + if (SuccSU != &SU) { + Edge.setSUnit(PredSU); + scheduleDAG->RemovePred(SuccSU, Edge); + scheduleDAG->AddPredQueued(&SU, Edge); + Edge.setSUnit(&SU); + scheduleDAG->AddPredQueued(SuccSU, Edge); + --i; + } + } + outer_loop_continue:; + } +} + +/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses +/// it as a def&use operand. Add a pseudo control edge from it to the other +/// node (if it won't create a cycle) so the two-address one will be scheduled +/// first (lower in the schedule). If both nodes are two-address, favor the +/// one that has a CopyToReg use (more likely to be a loop induction update). +/// If both are two-address, but one is commutable while the other is not +/// commutable, favor the one that's not commutable. +void RegReductionPQBase::AddPseudoTwoAddrDeps() { + for (SUnit &SU : *SUnits) { + if (!SU.isTwoAddress) + continue; + + SDNode *Node = SU.getNode(); + if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode()) + continue; + + bool isLiveOut = hasOnlyLiveOutUses(&SU); + unsigned Opc = Node->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; + for (unsigned j = 0; j != NumOps; ++j) { + if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) + continue; + SDNode *DU = SU.getNode()->getOperand(j).getNode(); + if (DU->getNodeId() == -1) + continue; + const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; + if (!DUSU) + continue; + for (const SDep &Succ : DUSU->Succs) { + if (Succ.isCtrl()) + continue; + SUnit *SuccSU = Succ.getSUnit(); + if (SuccSU == &SU) + continue; + // Be conservative. Ignore if nodes aren't at roughly the same + // depth and height. + if (SuccSU->getHeight() < SU.getHeight() && + (SU.getHeight() - SuccSU->getHeight()) > 1) + continue; + // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge + // constrains whatever is using the copy, instead of the copy + // itself. In the case that the copy is coalesced, this + // preserves the intent of the pseudo two-address heurietics. + while (SuccSU->Succs.size() == 1 && + SuccSU->getNode()->isMachineOpcode() && + SuccSU->getNode()->getMachineOpcode() == + TargetOpcode::COPY_TO_REGCLASS) + SuccSU = SuccSU->Succs.front().getSUnit(); + // Don't constrain non-instruction nodes. + if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) + continue; + // Don't constrain nodes with physical register defs if the + // predecessor can clobber them. + if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) { + if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI)) + continue; + } + // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; + // these may be coalesced away. We want them close to their uses. + unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); + if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || + SuccOpc == TargetOpcode::INSERT_SUBREG || + SuccOpc == TargetOpcode::SUBREG_TO_REG) + continue; + if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) && + (!canClobber(SuccSU, DUSU) || + (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || + (!SU.isCommutable && SuccSU->isCommutable)) && + !scheduleDAG->IsReachable(SuccSU, &SU)) { + LLVM_DEBUG(dbgs() + << " Adding a pseudo-two-addr edge from SU #" + << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); + scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial)); + } + } + } + } +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +ScheduleDAGSDNodes * +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + BURegReductionPriorityQueue *PQ = + new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} + +ScheduleDAGSDNodes * +llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + SrcRegReductionPriorityQueue *PQ = + new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} + +ScheduleDAGSDNodes * +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const TargetLowering *TLI = IS->TLI; + + HybridBURRPriorityQueue *PQ = + new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); + + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} + +ScheduleDAGSDNodes * +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const TargetLowering *TLI = IS->TLI; + + ILPBURRPriorityQueue *PQ = + new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp new file mode 100644 index 0000000000000..d4c1fb36475e7 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -0,0 +1,1033 @@ +//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "InstrEmitter.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "pre-RA-sched" + +STATISTIC(LoadsClustered, "Number of loads clustered together"); + +// This allows the latency-based scheduler to notice high latency instructions +// without a target itinerary. The choice of number here has more to do with +// balancing scheduler heuristics than with the actual machine latency. +static cl::opt<int> HighLatencyCycles( + "sched-high-latency-cycles", cl::Hidden, cl::init(10), + cl::desc("Roughly estimate the number of cycles that 'long latency'" + "instructions take for targets with no itinerary")); + +ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) + : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), + InstrItins(mf.getSubtarget().getInstrItineraryData()) {} + +/// Run - perform scheduling. +/// +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { + BB = bb; + DAG = dag; + + // Clear the scheduler's SUnit DAG. + ScheduleDAG::clearDAG(); + Sequence.clear(); + + // Invoke the target's selection of scheduler. + Schedule(); +} + +/// NewSUnit - Creates a new SUnit and return a ptr to it. +/// +SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { +#ifndef NDEBUG + const SUnit *Addr = nullptr; + if (!SUnits.empty()) + Addr = &SUnits[0]; +#endif + SUnits.emplace_back(N, (unsigned)SUnits.size()); + assert((Addr == nullptr || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + SUnit *SU = &SUnits.back(); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + if (!N || + (N->isMachineOpcode() && + N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)) + SU->SchedulingPref = Sched::None; + else + SU->SchedulingPref = TLI.getSchedulingPreference(N); + return SU; +} + +SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { + SUnit *SU = newSUnit(Old->getNode()); + SU->OrigNode = Old->OrigNode; + SU->Latency = Old->Latency; + SU->isVRegCycle = Old->isVRegCycle; + SU->isCall = Old->isCall; + SU->isCallOp = Old->isCallOp; + SU->isTwoAddress = Old->isTwoAddress; + SU->isCommutable = Old->isCommutable; + SU->hasPhysRegDefs = Old->hasPhysRegDefs; + SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->isScheduleHigh = Old->isScheduleHigh; + SU->isScheduleLow = Old->isScheduleLow; + SU->SchedulingPref = Old->SchedulingPref; + Old->isCloned = true; + return SU; +} + +/// CheckForPhysRegDependency - Check if the dependency between def and use of +/// a specified operand is a physical register dependency. If so, returns the +/// register and the cost of copying the register. +static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, + unsigned &PhysReg, int &Cost) { + if (Op != 2 || User->getOpcode() != ISD::CopyToReg) + return; + + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (Register::isVirtualRegister(Reg)) + return; + + unsigned ResNo = User->getOperand(2).getResNo(); + if (Def->getOpcode() == ISD::CopyFromReg && + cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) { + PhysReg = Reg; + } else if (Def->isMachineOpcode()) { + const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); + if (ResNo >= II.getNumDefs() && + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) + PhysReg = Reg; + } + + if (PhysReg != 0) { + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo)); + Cost = RC->getCopyCost(); + } +} + +// Helper for AddGlue to clone node operands. +static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs, + SDValue ExtraOper = SDValue()) { + SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end()); + if (ExtraOper.getNode()) + Ops.push_back(ExtraOper); + + SDVTList VTList = DAG->getVTList(VTs); + MachineSDNode *MN = dyn_cast<MachineSDNode>(N); + + // Store memory references. + SmallVector<MachineMemOperand *, 2> MMOs; + if (MN) + MMOs.assign(MN->memoperands_begin(), MN->memoperands_end()); + + DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops); + + // Reset the memory references + if (MN) + DAG->setNodeMemRefs(MN, MMOs); +} + +static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { + SDNode *GlueDestNode = Glue.getNode(); + + // Don't add glue from a node to itself. + if (GlueDestNode == N) return false; + + // Don't add a glue operand to something that already uses glue. + if (GlueDestNode && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { + return false; + } + // Don't add glue to something that already has a glue value. + if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false; + + SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end()); + if (AddGlue) + VTs.push_back(MVT::Glue); + + CloneNodeWithValues(N, DAG, VTs, Glue); + + return true; +} + +// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the +// node even though simply shrinking the value list is sufficient. +static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { + assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue && + !N->hasAnyUseOfValue(N->getNumValues() - 1)) && + "expected an unused glue value"); + + CloneNodeWithValues(N, DAG, + makeArrayRef(N->value_begin(), N->getNumValues() - 1)); +} + +/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. +/// This function finds loads of the same base and different offsets. If the +/// offsets are not far apart (target specific), it add MVT::Glue inputs and +/// outputs to ensure they are scheduled together and in order. This +/// optimization may benefit some targets by improving cache locality. +void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { + SDNode *Chain = nullptr; + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) + Chain = Node->getOperand(NumOps-1).getNode(); + if (!Chain) + return; + + // Skip any load instruction that has a tied input. There may be an additional + // dependency requiring a different order than by increasing offsets, and the + // added glue may introduce a cycle. + auto hasTiedInput = [this](const SDNode *N) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned I = 0; I != MCID.getNumOperands(); ++I) { + if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1) + return true; + } + + return false; + }; + + // Look for other loads of the same chain. Find loads that are loading from + // the same base pointer and different offsets. + SmallPtrSet<SDNode*, 16> Visited; + SmallVector<int64_t, 4> Offsets; + DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. + bool Cluster = false; + SDNode *Base = Node; + + if (hasTiedInput(Base)) + return; + + // This algorithm requires a reasonably low use count before finding a match + // to avoid uselessly blowing up compile time in large blocks. + unsigned UseCount = 0; + for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); + I != E && UseCount < 100; ++I, ++UseCount) { + SDNode *User = *I; + if (User == Node || !Visited.insert(User).second) + continue; + int64_t Offset1, Offset2; + if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || + Offset1 == Offset2 || + hasTiedInput(User)) { + // FIXME: Should be ok if they addresses are identical. But earlier + // optimizations really should have eliminated one of the loads. + continue; + } + if (O2SMap.insert(std::make_pair(Offset1, Base)).second) + Offsets.push_back(Offset1); + O2SMap.insert(std::make_pair(Offset2, User)); + Offsets.push_back(Offset2); + if (Offset2 < Offset1) + Base = User; + Cluster = true; + // Reset UseCount to allow more matches. + UseCount = 0; + } + + if (!Cluster) + return; + + // Sort them in increasing order. + llvm::sort(Offsets); + + // Check if the loads are close enough. + SmallVector<SDNode*, 4> Loads; + unsigned NumLoads = 0; + int64_t BaseOff = Offsets[0]; + SDNode *BaseLoad = O2SMap[BaseOff]; + Loads.push_back(BaseLoad); + for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { + int64_t Offset = Offsets[i]; + SDNode *Load = O2SMap[Offset]; + if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads)) + break; // Stop right here. Ignore loads that are further away. + Loads.push_back(Load); + ++NumLoads; + } + + if (NumLoads == 0) + return; + + // Cluster loads by adding MVT::Glue outputs and inputs. This also + // ensure they are scheduled in order of increasing addresses. + SDNode *Lead = Loads[0]; + SDValue InGlue = SDValue(nullptr, 0); + if (AddGlue(Lead, InGlue, true, DAG)) + InGlue = SDValue(Lead, Lead->getNumValues() - 1); + for (unsigned I = 1, E = Loads.size(); I != E; ++I) { + bool OutGlue = I < E - 1; + SDNode *Load = Loads[I]; + + // If AddGlue fails, we could leave an unsused glue value. This should not + // cause any + if (AddGlue(Load, InGlue, OutGlue, DAG)) { + if (OutGlue) + InGlue = SDValue(Load, Load->getNumValues() - 1); + + ++LoadsClustered; + } + else if (!OutGlue && InGlue.getNode()) + RemoveUnusedGlue(InGlue.getNode(), DAG); + } +} + +/// ClusterNodes - Cluster certain nodes which should be scheduled together. +/// +void ScheduleDAGSDNodes::ClusterNodes() { + for (SDNode &NI : DAG->allnodes()) { + SDNode *Node = &NI; + if (!Node || !Node->isMachineOpcode()) + continue; + + unsigned Opc = Node->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + if (MCID.mayLoad()) + // Cluster loads from "near" addresses into combined SUnits. + ClusterNeighboringLoads(Node); + } +} + +void ScheduleDAGSDNodes::BuildSchedUnits() { + // During scheduling, the NodeId field of SDNode is used to map SDNodes + // to their associated SUnits by holding SUnits table indices. A value + // of -1 means the SDNode does not yet have an associated SUnit. + unsigned NumNodes = 0; + for (SDNode &NI : DAG->allnodes()) { + NI.setNodeId(-1); + ++NumNodes; + } + + // Reserve entries in the vector for each of the SUnits we are creating. This + // ensure that reallocation of the vector won't happen, so SUnit*'s won't get + // invalidated. + // FIXME: Multiply by 2 because we may clone nodes during scheduling. + // This is a temporary workaround. + SUnits.reserve(NumNodes * 2); + + // Add all nodes in depth first order. + SmallVector<SDNode*, 64> Worklist; + SmallPtrSet<SDNode*, 32> Visited; + Worklist.push_back(DAG->getRoot().getNode()); + Visited.insert(DAG->getRoot().getNode()); + + SmallVector<SUnit*, 8> CallSUnits; + while (!Worklist.empty()) { + SDNode *NI = Worklist.pop_back_val(); + + // Add all operands to the worklist unless they've already been added. + for (const SDValue &Op : NI->op_values()) + if (Visited.insert(Op.getNode()).second) + Worklist.push_back(Op.getNode()); + + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. + continue; + + // If this node has already been processed, stop now. + if (NI->getNodeId() != -1) continue; + + SUnit *NodeSUnit = newSUnit(NI); + + // See if anything is glued to this node, if so, add them to glued + // nodes. Nodes can have at most one glue input and one glue output. Glue + // is required to be the last operand and result of a node. + + // Scan up to find glued preds. + SDNode *N = NI; + while (N->getNumOperands() && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { + N = N->getOperand(N->getNumOperands()-1).getNode(); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; + } + + // Scan down to find any glued succs. + N = NI; + while (N->getValueType(N->getNumValues()-1) == MVT::Glue) { + SDValue GlueVal(N, N->getNumValues()-1); + + // There are either zero or one users of the Glue result. + bool HasGlueUse = false; + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) + if (GlueVal.isOperandOf(*UI)) { + HasGlueUse = true; + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + N = *UI; + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; + break; + } + if (!HasGlueUse) break; + } + + if (NodeSUnit->isCall) + CallSUnits.push_back(NodeSUnit); + + // Schedule zero-latency TokenFactor below any nodes that may increase the + // schedule height. Otherwise, ancestors of the TokenFactor may appear to + // have false stalls. + if (NI->getOpcode() == ISD::TokenFactor) + NodeSUnit->isScheduleLow = true; + + // If there are glue operands involved, N is now the bottom-most node + // of the sequence of nodes that are glued together. + // Update the SUnit. + NodeSUnit->setNode(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + + // Compute NumRegDefsLeft. This must be done before AddSchedEdges. + InitNumRegDefsLeft(NodeSUnit); + + // Assign the Latency field of NodeSUnit using target-provided information. + computeLatency(NodeSUnit); + } + + // Find all call operands. + while (!CallSUnits.empty()) { + SUnit *SU = CallSUnits.pop_back_val(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->getOpcode() != ISD::CopyToReg) + continue; + SDNode *SrcN = SUNode->getOperand(2).getNode(); + if (isPassiveNode(SrcN)) continue; // Not scheduled. + SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; + SrcSU->isCallOp = true; + } + } +} + +void ScheduleDAGSDNodes::AddSchedEdges() { + const TargetSubtargetInfo &ST = MF.getSubtarget(); + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = forceUnitLatencies(); + + // Pass 2: add the preds, succs, etc. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + SUnit *SU = &SUnits[su]; + SDNode *MainNode = SU->getNode(); + + if (MainNode->isMachineOpcode()) { + unsigned Opc = MainNode->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + SU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + SU->isCommutable = true; + } + + // Find all predecessors and successors of the group. + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { + if (N->isMachineOpcode() && + TII->get(N->getMachineOpcode()).getImplicitDefs()) { + SU->hasPhysRegClobbers = true; + unsigned NumUsed = InstrEmitter::CountResults(N); + while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) + --NumUsed; // Skip over unused values at the end. + if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) + SU->hasPhysRegDefs = true; + } + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *OpN = N->getOperand(i).getNode(); + if (isPassiveNode(OpN)) continue; // Not scheduled. + SUnit *OpSU = &SUnits[OpN->getNodeId()]; + assert(OpSU && "Node has no SUnit!"); + if (OpSU == SU) continue; // In the same group. + + EVT OpVT = N->getOperand(i).getValueType(); + assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); + bool isChain = OpVT == MVT::Other; + + unsigned PhysReg = 0; + int Cost = 1; + // Determine if this is a physical register dependency. + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); + assert((PhysReg == 0 || !isChain) && + "Chain dependence via physreg data?"); + // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler + // emits a copy from the physical register to a virtual register unless + // it requires a cross class copy (cost < 0). That means we are only + // treating "expensive to copy" register dependency as physical register + // dependency. This may change in the future though. + if (Cost >= 0 && !StressSched) + PhysReg = 0; + + // If this is a ctrl dep, latency is 1. + unsigned OpLatency = isChain ? 1 : OpSU->Latency; + // Special-case TokenFactor chains as zero-latency. + if(isChain && OpN->getOpcode() == ISD::TokenFactor) + OpLatency = 0; + + SDep Dep = isChain ? SDep(OpSU, SDep::Barrier) + : SDep(OpSU, SDep::Data, PhysReg); + Dep.setLatency(OpLatency); + if (!isChain && !UnitLatencies) { + computeOperandLatency(OpN, N, i, Dep); + ST.adjustSchedDependency(OpSU, SU, Dep); + } + + if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { + // Multiple register uses are combined in the same SUnit. For example, + // we could have a set of glued nodes with all their defs consumed by + // another set of glued nodes. Register pressure tracking sees this as + // a single use, so to keep pressure balanced we reduce the defs. + // + // We can't tell (without more book-keeping) if this results from + // glued nodes or duplicate operands. As long as we don't reduce + // NumRegDefsLeft to zero, we handle the common cases well. + --OpSU->NumRegDefsLeft; + } + } + } + } +} + +/// BuildSchedGraph - Build the SUnit graph from the selection dag that we +/// are input. This SUnit graph is similar to the SelectionDAG, but +/// excludes nodes that aren't interesting to scheduling, and represents +/// glued together nodes with a single SUnit. +void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) { + // Cluster certain nodes which should be scheduled together. + ClusterNodes(); + // Populate the SUnits array. + BuildSchedUnits(); + // Compute all the scheduling dependencies between nodes. + AddSchedEdges(); +} + +// Initialize NumNodeDefs for the current Node's opcode. +void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { + // Check for phys reg copy. + if (!Node) + return; + + if (!Node->isMachineOpcode()) { + if (Node->getOpcode() == ISD::CopyFromReg) + NodeNumDefs = 1; + else + NodeNumDefs = 0; + return; + } + unsigned POpc = Node->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) { + // No register need be allocated for this. + NodeNumDefs = 0; + return; + } + if (POpc == TargetOpcode::PATCHPOINT && + Node->getValueType(0) == MVT::Other) { + // PATCHPOINT is defined to have one result, but it might really have none + // if we're not using CallingConv::AnyReg. Don't mistake the chain for a + // real definition. + NodeNumDefs = 0; + return; + } + unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs(); + // Some instructions define regs that are not represented in the selection DAG + // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. + NodeNumDefs = std::min(Node->getNumValues(), NRegDefs); + DefIdx = 0; +} + +// Construct a RegDefIter for this SUnit and find the first valid value. +ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU, + const ScheduleDAGSDNodes *SD) + : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) { + InitNodeNumDefs(); + Advance(); +} + +// Advance to the next valid value defined by the SUnit. +void ScheduleDAGSDNodes::RegDefIter::Advance() { + for (;Node;) { // Visit all glued nodes. + for (;DefIdx < NodeNumDefs; ++DefIdx) { + if (!Node->hasAnyUseOfValue(DefIdx)) + continue; + ValueType = Node->getSimpleValueType(DefIdx); + ++DefIdx; + return; // Found a normal regdef. + } + Node = Node->getGluedNode(); + if (!Node) { + return; // No values left to visit. + } + InitNodeNumDefs(); + } +} + +void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { + assert(SU->NumRegDefsLeft == 0 && "expect a new node"); + for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { + assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected"); + ++SU->NumRegDefsLeft; + } +} + +void ScheduleDAGSDNodes::computeLatency(SUnit *SU) { + SDNode *N = SU->getNode(); + + // TokenFactor operands are considered zero latency, and some schedulers + // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero + // whenever node latency is nonzero. + if (N && N->getOpcode() == ISD::TokenFactor) { + SU->Latency = 0; + return; + } + + // Check to see if the scheduler cares about latencies. + if (forceUnitLatencies()) { + SU->Latency = 1; + return; + } + + if (!InstrItins || InstrItins->isEmpty()) { + if (N && N->isMachineOpcode() && + TII->isHighLatencyDef(N->getMachineOpcode())) + SU->Latency = HighLatencyCycles; + else + SU->Latency = 1; + return; + } + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes glued together into this SUnit. + SU->Latency = 0; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) + SU->Latency += TII->getInstrLatency(InstrItins, N); +} + +void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const{ + // Check to see if the scheduler cares about latencies. + if (forceUnitLatencies()) + return; + + if (dep.getKind() != SDep::Data) + return; + + unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); + if (Use->isMachineOpcode()) + // Adjust the use operand index by num of defs. + OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); + int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); + if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && + !BB->succ_empty()) { + unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); + if (Register::isVirtualRegister(Reg)) + // This copy is a liveout value. It is likely coalesced, so reduce the + // latency so not to penalize the def. + // FIXME: need target specific adjustment here? + Latency = (Latency > 1) ? Latency - 1 : 1; + } + if (Latency >= 0) + dep.setLatency(Latency); +} + +void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + dumpNodeName(SU); + dbgs() << ": "; + + if (!SU.getNode()) { + dbgs() << "PHYS REG COPY\n"; + return; + } + + SU.getNode()->dump(DAG); + dbgs() << "\n"; + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { + dbgs() << " "; + GluedNodes.back()->dump(DAG); + dbgs() << "\n"; + GluedNodes.pop_back(); + } +#endif +} + +void ScheduleDAGSDNodes::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + if (EntrySU.getNode() != nullptr) + dumpNodeAll(EntrySU); + for (const SUnit &SU : SUnits) + dumpNodeAll(SU); + if (ExitSU.getNode() != nullptr) + dumpNodeAll(ExitSU); +#endif +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGSDNodes::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + dumpNode(*SU); + else + dbgs() << "**** NOOP ****\n"; + } +} +#endif + +#ifndef NDEBUG +/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that +/// their state is consistent with the nodes listed in Sequence. +/// +void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { + unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +} +#endif // NDEBUG + +/// ProcessSDDbgValues - Process SDDbgValues associated with this node. +static void +ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, + DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) { + if (!N->getHasDebugValue()) + return; + + // Opportunistically insert immediate dbg_value uses, i.e. those with the same + // source order number as N. + MachineBasicBlock *BB = Emitter.getBlock(); + MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); + for (auto DV : DAG->GetDbgValues(N)) { + if (DV->isEmitted()) + continue; + unsigned DVOrder = DV->getOrder(); + if (!Order || DVOrder == Order) { + MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap); + if (DbgMI) { + Orders.push_back({DVOrder, DbgMI}); + BB->insert(InsertPos, DbgMI); + } + } + } +} + +// ProcessSourceNode - Process nodes with source order numbers. These are added +// to a vector which EmitSchedule uses to determine how to insert dbg_value +// instructions in the right order. +static void +ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + DenseMap<SDValue, unsigned> &VRBaseMap, + SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders, + SmallSet<unsigned, 8> &Seen, MachineInstr *NewInsn) { + unsigned Order = N->getIROrder(); + if (!Order || Seen.count(Order)) { + // Process any valid SDDbgValues even if node does not have any order + // assigned. + ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0); + return; + } + + // If a new instruction was generated for this Order number, record it. + // Otherwise, leave this order number unseen: we will either find later + // instructions for it, or leave it unseen if there were no instructions at + // all. + if (NewInsn) { + Seen.insert(Order); + Orders.push_back({Order, NewInsn}); + } + + // Even if no instruction was generated, a Value may have become defined via + // earlier nodes. Try to process them now. + ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); +} + +void ScheduleDAGSDNodes:: +EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->CopyDstRC) { + // Copy to physical register. + DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); + assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); + // Find the destination physical register. + unsigned Reg = 0; + for (SUnit::const_succ_iterator II = SU->Succs.begin(), + EE = SU->Succs.end(); II != EE; ++II) { + if (II->isCtrl()) continue; // ignore chain preds + if (II->getReg()) { + Reg = II->getReg(); + break; + } + } + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); + } else { + // Copy from physical register. + assert(I->getReg() && "Unknown physical register!"); + Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); + } + break; + } +} + +/// EmitSchedule - Emit the machine code in scheduled order. Return the new +/// InsertPos and MachineBasicBlock that contains this insertion +/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does +/// not necessarily refer to returned BB. The emitter may split blocks. +MachineBasicBlock *ScheduleDAGSDNodes:: +EmitSchedule(MachineBasicBlock::iterator &InsertPos) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap<SDValue, unsigned> VRBaseMap; + DenseMap<SUnit*, unsigned> CopyVRBaseMap; + SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; + SmallSet<unsigned, 8> Seen; + bool HasDbg = DAG->hasDebugValues(); + + // Emit a node, and determine where its first instruction is for debuginfo. + // Zero, one, or multiple instructions can be created when emitting a node. + auto EmitNode = + [&](SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * { + // Fetch instruction prior to this, or end() if nonexistant. + auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { + if (I == BB->begin()) + return BB->end(); + else + return std::prev(Emitter.getInsertPos()); + }; + + MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos()); + Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap); + MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos()); + + // If the iterator did not change, no instructions were inserted. + if (Before == After) + return nullptr; + + MachineInstr *MI; + if (Before == BB->end()) { + // There were no prior instructions; the new ones must start at the + // beginning of the block. + MI = &Emitter.getBlock()->instr_front(); + } else { + // Return first instruction after the pre-existing instructions. + MI = &*std::next(Before); + } + + if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues) + MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node)); + + return MI; + }; + + // If this is the first BB, emit byval parameter dbg_value's. + if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { + SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); + SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); + for (; PDI != PDE; ++PDI) { + MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); + if (DbgMI) { + BB->insert(InsertPos, DbgMI); + // We re-emit the dbg_value closer to its use, too, after instructions + // are emitted to the BB. + (*PDI)->clearIsEmitted(); + } + } + } + + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + TII->insertNoop(*Emitter.getBlock(), InsertPos); + continue; + } + + // For pre-regalloc scheduling, create instructions corresponding to the + // SDNode and any glued SDNodes and append them to the block. + if (!SU->getNode()) { + // Emit a copy. + EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos); + continue; + } + + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { + SDNode *N = GluedNodes.back(); + auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap); + // Remember the source order of the inserted instruction. + if (HasDbg) + ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + + if (MDNode *MD = DAG->getHeapAllocSite(N)) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } + + GluedNodes.pop_back(); + } + auto NewInsn = + EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); + // Remember the source order of the inserted instruction. + if (HasDbg) + ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, + NewInsn); + if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } + } + + // Insert all the dbg_values which have not already been inserted in source + // order sequence. + if (HasDbg) { + MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); + + // Sort the source order instructions and use the order to insert debug + // values. Use stable_sort so that DBG_VALUEs are inserted in the same order + // regardless of the host's implementation fo std::sort. + llvm::stable_sort(Orders, less_first()); + std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(), + [](const SDDbgValue *LHS, const SDDbgValue *RHS) { + return LHS->getOrder() < RHS->getOrder(); + }); + + SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); + SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); + // Now emit the rest according to source order. + unsigned LastOrder = 0; + for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { + unsigned Order = Orders[i].first; + MachineInstr *MI = Orders[i].second; + // Insert all SDDbgValue's whose order(s) are before "Order". + assert(MI); + for (; DI != DE; ++DI) { + if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order) + break; + if ((*DI)->isEmitted()) + continue; + + MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); + if (DbgMI) { + if (!LastOrder) + // Insert to start of the BB (after PHIs). + BB->insert(BBBegin, DbgMI); + else { + // Insert at the instruction, which may be in a different + // block, if the block was split by a custom inserter. + MachineBasicBlock::iterator Pos = MI; + MI->getParent()->insert(Pos, DbgMI); + } + } + } + LastOrder = Order; + } + // Add trailing DbgValue's before the terminator. FIXME: May want to add + // some of them before one or more conditional branches? + SmallVector<MachineInstr*, 8> DbgMIs; + for (; DI != DE; ++DI) { + if ((*DI)->isEmitted()) + continue; + assert((*DI)->getOrder() >= LastOrder && + "emitting DBG_VALUE out of order"); + if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) + DbgMIs.push_back(DbgMI); + } + + MachineBasicBlock *InsertBB = Emitter.getBlock(); + MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); + InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); + + SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin(); + SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd(); + // Now emit the rest according to source order. + LastOrder = 0; + for (const auto &InstrOrder : Orders) { + unsigned Order = InstrOrder.first; + MachineInstr *MI = InstrOrder.second; + if (!MI) + continue; + + // Insert all SDDbgLabel's whose order(s) are before "Order". + for (; DLI != DLE && + (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order; + ++DLI) { + MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI); + if (DbgMI) { + if (!LastOrder) + // Insert to start of the BB (after PHIs). + BB->insert(BBBegin, DbgMI); + else { + // Insert at the instruction, which may be in a different + // block, if the block was split by a custom inserter. + MachineBasicBlock::iterator Pos = MI; + MI->getParent()->insert(Pos, DbgMI); + } + } + } + if (DLI == DLE) + break; + + LastOrder = Order; + } + } + + InsertPos = Emitter.getInsertPos(); + return Emitter.getBlock(); +} + +/// Return the basic block label. +std::string ScheduleDAGSDNodes::getDAGName() const { + return "sunit-dag." + BB->getFullName(); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h new file mode 100644 index 0000000000000..183ce4b0652d0 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -0,0 +1,193 @@ +//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScheduleDAGSDNodes class, which implements +// scheduling for an SDNode-based dependency graph. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H + +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/MachineValueType.h" +#include <cassert> +#include <string> +#include <vector> + +namespace llvm { + +class AAResults; +class InstrItineraryData; + + /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. + /// + /// Edges between SUnits are initially based on edges in the SelectionDAG, + /// and additional edges can be added by the schedulers as heuristics. + /// SDNodes such as Constants, Registers, and a few others that are not + /// interesting to schedulers are not allocated SUnits. + /// + /// SDNodes with MVT::Glue operands are grouped along with the flagged + /// nodes into a single SUnit so that they are scheduled together. + /// + /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output + /// edges. Physical register dependence information is not carried in + /// the DAG and must be handled explicitly by schedulers. + /// + class ScheduleDAGSDNodes : public ScheduleDAG { + public: + MachineBasicBlock *BB; + SelectionDAG *DAG; // DAG of the current basic block + const InstrItineraryData *InstrItins; + + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; + + explicit ScheduleDAGSDNodes(MachineFunction &mf); + + ~ScheduleDAGSDNodes() override = default; + + /// Run - perform scheduling. + /// + void Run(SelectionDAG *dag, MachineBasicBlock *bb); + + /// isPassiveNode - Return true if the node is a non-scheduled leaf. + /// + static bool isPassiveNode(SDNode *Node) { + if (isa<ConstantSDNode>(Node)) return true; + if (isa<ConstantFPSDNode>(Node)) return true; + if (isa<RegisterSDNode>(Node)) return true; + if (isa<RegisterMaskSDNode>(Node)) return true; + if (isa<GlobalAddressSDNode>(Node)) return true; + if (isa<BasicBlockSDNode>(Node)) return true; + if (isa<FrameIndexSDNode>(Node)) return true; + if (isa<ConstantPoolSDNode>(Node)) return true; + if (isa<TargetIndexSDNode>(Node)) return true; + if (isa<JumpTableSDNode>(Node)) return true; + if (isa<ExternalSymbolSDNode>(Node)) return true; + if (isa<MCSymbolSDNode>(Node)) return true; + if (isa<BlockAddressSDNode>(Node)) return true; + if (Node->getOpcode() == ISD::EntryToken || + isa<MDNodeSDNode>(Node)) return true; + return false; + } + + /// NewSUnit - Creates a new SUnit and return a ptr to it. + /// + SUnit *newSUnit(SDNode *N); + + /// Clone - Creates a clone of the specified SUnit. It does not copy the + /// predecessors / successors info nor the temporary scheduling states. + /// + SUnit *Clone(SUnit *Old); + + /// BuildSchedGraph - Build the SUnit graph from the selection dag that we + /// are input. This SUnit graph is similar to the SelectionDAG, but + /// excludes nodes that aren't interesting to scheduling, and represents + /// flagged together nodes with a single SUnit. + void BuildSchedGraph(AAResults *AA); + + /// InitNumRegDefsLeft - Determine the # of regs defined by this node. + /// + void InitNumRegDefsLeft(SUnit *SU); + + /// computeLatency - Compute node latency. + /// + virtual void computeLatency(SUnit *SU); + + virtual void computeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const; + + /// Schedule - Order nodes according to selected style, filling + /// in the Sequence member. + /// + virtual void Schedule() = 0; + + /// VerifyScheduledSequence - Verify that all SUnits are scheduled and + /// consistent with the Sequence of scheduled instructions. + void VerifyScheduledSequence(bool isBottomUp); + + /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock + /// according to the order specified in Sequence. + /// + virtual MachineBasicBlock* + EmitSchedule(MachineBasicBlock::iterator &InsertPos); + + void dumpNode(const SUnit &SU) const override; + void dump() const override; + void dumpSchedule() const; + + std::string getGraphNodeLabel(const SUnit *SU) const override; + + std::string getDAGName() const override; + + virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; + + /// RegDefIter - In place iteration over the values defined by an + /// SUnit. This does not need copies of the iterator or any other STLisms. + /// The iterator creates itself, rather than being provided by the SchedDAG. + class RegDefIter { + const ScheduleDAGSDNodes *SchedDAG; + const SDNode *Node; + unsigned DefIdx; + unsigned NodeNumDefs; + MVT ValueType; + + public: + RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); + + bool IsValid() const { return Node != nullptr; } + + MVT GetValue() const { + assert(IsValid() && "bad iterator"); + return ValueType; + } + + const SDNode *GetNode() const { + return Node; + } + + unsigned GetIdx() const { + return DefIdx-1; + } + + void Advance(); + + private: + void InitNodeNumDefs(); + }; + + protected: + /// ForceUnitLatencies - Return true if all scheduling edges should be given + /// a latency value of one. The default is to return false; schedulers may + /// override this as needed. + virtual bool forceUnitLatencies() const { return false; } + + private: + /// ClusterNeighboringLoads - Cluster loads from "near" addresses into + /// combined SUnits. + void ClusterNeighboringLoads(SDNode *Node); + /// ClusterNodes - Cluster certain nodes which should be scheduled together. + /// + void ClusterNodes(); + + /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. + void BuildSchedUnits(); + void AddSchedEdges(); + + void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos); + }; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp new file mode 100644 index 0000000000000..e7bac73678a76 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -0,0 +1,276 @@ +//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <climits> +using namespace llvm; + +#define DEBUG_TYPE "pre-RA-sched" + +STATISTIC(NumNoops , "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); + +static RegisterScheduler + VLIWScheduler("vliw-td", "VLIW scheduler", + createVLIWDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This +/// supports / top-down scheduling. +/// +class ScheduleDAGVLIW : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands become available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// AA - AAResults for making memory reference queries. + AAResults *AA; + +public: + ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa, + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { + const TargetSubtargetInfo &STI = mf.getSubtarget(); + HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); + } + + ~ScheduleDAGVLIW() override { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule() override; + +private: + void releaseSucc(SUnit *SU, const SDep &D); + void releaseSuccessors(SUnit *SU); + void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void listScheduleTopDown(); +}; +} // end anonymous namespace + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGVLIW::Schedule() { + LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); + + // Build the scheduling graph. + BuildSchedGraph(AA); + + AvailableQueue->initNodes(SUnits); + + listScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { + SUnit *SuccSU = D.getSUnit(); + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + dumpNode(*SuccSU); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(nullptr); + } +#endif + assert(!D.isWeak() && "unexpected artificial DAG edge"); + + --SuccSU->NumPredsLeft; + + SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { + PendingQueue.push_back(SuccSU); + } +} + +void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) { + // Top down: release successors. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-td scheduler doesn't yet support physreg dependencies!"); + + releaseSucc(SU, *I); + } +} + +/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + LLVM_DEBUG(dumpNode(*SU)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + releaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->scheduledNode(SU); +} + +/// listScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGVLIW::listScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + releaseSuccessors(&EntrySU); + + // All leaves to AvailableQueue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While AvailableQueue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() == CurCycle) { + AvailableQueue->push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } + else { + assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); + } + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue->empty()) { + // Reset DFA state. + AvailableQueue->scheduledNode(nullptr); + ++CurCycle; + continue; + } + + SUnit *FoundSUnit = nullptr; + + bool HasNoopHazards = false; + while (!AvailableQueue->empty()) { + SUnit *CurSUnit = AvailableQueue->pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue->push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule, do it now. + if (FoundSUnit) { + scheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, just advance + // the current cycle and try again. + LLVM_DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); + HazardRec->AdvanceCycle(); + ++NumStalls; + ++CurCycle; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + LLVM_DEBUG(dbgs() << "*** Emitting noop\n"); + HazardRec->EmitNoop(); + Sequence.push_back(nullptr); // NULL here means noop + ++NumNoops; + ++CurCycle; + } + } + +#ifndef NDEBUG + VerifyScheduledSequence(/*isBottomUp=*/false); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +/// createVLIWDAGScheduler - This creates a top-down list scheduler. +ScheduleDAGSDNodes * +llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS)); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp new file mode 100644 index 0000000000000..52a71b91d93f6 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -0,0 +1,9631 @@ +//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <limits> +#include <set> +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; + +/// makeVTList - Return an instance of the SDVTList struct initialized with the +/// specified members. +static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { + SDVTList Res = {VTs, NumVTs}; + return Res; +} + +// Default null implementations of the callbacks. +void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} +void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} +void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {} + +void SelectionDAG::DAGNodeDeletedListener::anchor() {} + +#define DEBUG_TYPE "selectiondag" + +static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt", + cl::Hidden, cl::init(true), + cl::desc("Gang up loads and stores generated by inlining of memcpy")); + +static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max", + cl::desc("Number limit for gluing ld/st of memcpy."), + cl::Hidden, cl::init(0)); + +static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { + LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G);); +} + +//===----------------------------------------------------------------------===// +// ConstantFPSDNode Class +//===----------------------------------------------------------------------===// + +/// isExactlyValue - We don't rely on operator== working on double values, as +/// it returns true for things that are clearly not equal, like -0.0 and 0.0. +/// As such, this method can be used to do an exact bit-for-bit comparison of +/// two floating point values. +bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { + return getValueAPF().bitwiseIsEqual(V); +} + +bool ConstantFPSDNode::isValueValidForType(EVT VT, + const APFloat& Val) { + assert(VT.isFloatingPoint() && "Can only convert between FP types"); + + // convert modifies in place, so make a copy. + APFloat Val2 = APFloat(Val); + bool losesInfo; + (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, + &losesInfo); + return !losesInfo; +} + +//===----------------------------------------------------------------------===// +// ISD Namespace +//===----------------------------------------------------------------------===// + +bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { + auto *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasUndefs; + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, + EltSize) && + EltSize == SplatBitSize; +} + +// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be +// specializations of the more general isConstantSplatVector()? + +bool ISD::isBuildVectorAllOnes(const SDNode *N) { + // Look through a bit convert. + while (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).isUndef()) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target and + // a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all ones, not whether the individual + // constants are. + SDValue NotZero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) { + if (CN->getAPIntValue().countTrailingOnes() < EltSize) + return false; + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize) + return false; + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. Even with the above element type twiddling, this should be OK, as + // the same type legalization should have applied to all the elements. + for (++i; i != e; ++i) + if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef()) + return false; + return true; +} + +bool ISD::isBuildVectorAllZeros(const SDNode *N) { + // Look through a bit convert. + while (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + bool IsAllUndef = true; + for (const SDValue &Op : N->op_values()) { + if (Op.isUndef()) + continue; + IsAllUndef = false; + // Do not accept build_vectors that aren't all constants or which have non-0 + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target + // and a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all zeros, not whether the individual + // constants are. + unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) { + if (CN->getAPIntValue().countTrailingZeros() < EltSize) + return false; + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize) + return false; + } else + return false; + } + + // Do not accept an all-undef vector. + if (IsAllUndef) + return false; + return true; +} + +bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (const SDValue &Op : N->op_values()) { + if (Op.isUndef()) + continue; + if (!isa<ConstantSDNode>(Op)) + return false; + } + return true; +} + +bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (const SDValue &Op : N->op_values()) { + if (Op.isUndef()) + continue; + if (!isa<ConstantFPSDNode>(Op)) + return false; + } + return true; +} + +bool ISD::allOperandsUndef(const SDNode *N) { + // Return false if the node has no operands. + // This is "logically inconsistent" with the definition of "all" but + // is probably the desired behavior. + if (N->getNumOperands() == 0) + return false; + return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); }); +} + +bool ISD::matchUnaryPredicate(SDValue Op, + std::function<bool(ConstantSDNode *)> Match, + bool AllowUndefs) { + // FIXME: Add support for scalar UNDEF cases? + if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) + return Match(Cst); + + // FIXME: Add support for vector UNDEF cases? + if (ISD::BUILD_VECTOR != Op.getOpcode()) + return false; + + EVT SVT = Op.getValueType().getScalarType(); + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + if (AllowUndefs && Op.getOperand(i).isUndef()) { + if (!Match(nullptr)) + return false; + continue; + } + + auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); + if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) + return false; + } + return true; +} + +bool ISD::matchBinaryPredicate( + SDValue LHS, SDValue RHS, + std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, + bool AllowUndefs, bool AllowTypeMismatch) { + if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType()) + return false; + + // TODO: Add support for scalar UNDEF cases? + if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) + if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) + return Match(LHSCst, RHSCst); + + // TODO: Add support for vector UNDEF cases? + if (ISD::BUILD_VECTOR != LHS.getOpcode() || + ISD::BUILD_VECTOR != RHS.getOpcode()) + return false; + + EVT SVT = LHS.getValueType().getScalarType(); + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + SDValue LHSOp = LHS.getOperand(i); + SDValue RHSOp = RHS.getOperand(i); + bool LHSUndef = AllowUndefs && LHSOp.isUndef(); + bool RHSUndef = AllowUndefs && RHSOp.isUndef(); + auto *LHSCst = dyn_cast<ConstantSDNode>(LHSOp); + auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp); + if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef)) + return false; + if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT || + LHSOp.getValueType() != RHSOp.getValueType())) + return false; + if (!Match(LHSCst, RHSCst)) + return false; + } + return true; +} + +ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { + switch (ExtType) { + case ISD::EXTLOAD: + return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND; + case ISD::SEXTLOAD: + return ISD::SIGN_EXTEND; + case ISD::ZEXTLOAD: + return ISD::ZERO_EXTEND; + default: + break; + } + + llvm_unreachable("Invalid LoadExtType"); +} + +ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { + // To perform this operation, we just need to swap the L and G bits of the + // operation. + unsigned OldL = (Operation >> 2) & 1; + unsigned OldG = (Operation >> 1) & 1; + return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits + (OldL << 1) | // New G bit + (OldG << 2)); // New L bit. +} + +ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { + unsigned Operation = Op; + if (isInteger) + Operation ^= 7; // Flip L, G, E bits, but not U. + else + Operation ^= 15; // Flip all of the condition bits. + + if (Operation > ISD::SETTRUE2) + Operation &= ~8; // Don't let N and U bits get set. + + return ISD::CondCode(Operation); +} + +/// For an integer comparison, return 1 if the comparison is a signed operation +/// and 2 if the result is an unsigned comparison. Return zero if the operation +/// does not depend on the sign of the input (setne and seteq). +static int isSignedOp(ISD::CondCode Opcode) { + switch (Opcode) { + default: llvm_unreachable("Illegal integer setcc operation!"); + case ISD::SETEQ: + case ISD::SETNE: return 0; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return 1; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return 2; + } +} + +ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool IsInteger) { + if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed integer setcc with an unsigned integer setcc. + return ISD::SETCC_INVALID; + + unsigned Op = Op1 | Op2; // Combine all of the condition bits. + + // If the N and U bits get set, then the resultant comparison DOES suddenly + // care about orderedness, and it is true when ordered. + if (Op > ISD::SETTRUE2) + Op &= ~16; // Clear the U bit if the N bit is set. + + // Canonicalize illegal integer setcc's. + if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT + Op = ISD::SETNE; + + return ISD::CondCode(Op); +} + +ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool IsInteger) { + if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed setcc with an unsigned setcc. + return ISD::SETCC_INVALID; + + // Combine all of the condition bits. + ISD::CondCode Result = ISD::CondCode(Op1 & Op2); + + // Canonicalize illegal integer setcc's. + if (IsInteger) { + switch (Result) { + default: break; + case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT + case ISD::SETOEQ: // SETEQ & SETU[LG]E + case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE + case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE + case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE + } + } + + return Result; +} + +//===----------------------------------------------------------------------===// +// SDNode Profile Support +//===----------------------------------------------------------------------===// + +/// AddNodeIDOpcode - Add the node opcode to the NodeID data. +static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { + ID.AddInteger(OpC); +} + +/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them +/// solely with their pointer. +static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { + ID.AddPointer(VTList.VTs); +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +static void AddNodeIDOperands(FoldingSetNodeID &ID, + ArrayRef<SDValue> Ops) { + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); + } +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +static void AddNodeIDOperands(FoldingSetNodeID &ID, + ArrayRef<SDUse> Ops) { + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); + } +} + +static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, + SDVTList VTList, ArrayRef<SDValue> OpList) { + AddNodeIDOpcode(ID, OpC); + AddNodeIDValueTypes(ID, VTList); + AddNodeIDOperands(ID, OpList); +} + +/// If this is an SDNode with special info, add this info to the NodeID data. +static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { + switch (N->getOpcode()) { + case ISD::TargetExternalSymbol: + case ISD::ExternalSymbol: + case ISD::MCSymbol: + llvm_unreachable("Should only be used on nodes with operands"); + default: break; // Normal nodes don't need extra info. + case ISD::TargetConstant: + case ISD::Constant: { + const ConstantSDNode *C = cast<ConstantSDNode>(N); + ID.AddPointer(C->getConstantIntValue()); + ID.AddBoolean(C->isOpaque()); + break; + } + case ISD::TargetConstantFP: + case ISD::ConstantFP: + ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); + break; + case ISD::TargetGlobalAddress: + case ISD::GlobalAddress: + case ISD::TargetGlobalTLSAddress: + case ISD::GlobalTLSAddress: { + const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); + ID.AddPointer(GA->getGlobal()); + ID.AddInteger(GA->getOffset()); + ID.AddInteger(GA->getTargetFlags()); + break; + } + case ISD::BasicBlock: + ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock()); + break; + case ISD::Register: + ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); + break; + case ISD::RegisterMask: + ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask()); + break; + case ISD::SRCVALUE: + ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); + break; + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); + break; + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + if (cast<LifetimeSDNode>(N)->hasOffset()) { + ID.AddInteger(cast<LifetimeSDNode>(N)->getSize()); + ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset()); + } + break; + case ISD::JumpTable: + case ISD::TargetJumpTable: + ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); + ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags()); + break; + case ISD::ConstantPool: + case ISD::TargetConstantPool: { + const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); + ID.AddInteger(CP->getAlignment()); + ID.AddInteger(CP->getOffset()); + if (CP->isMachineConstantPoolEntry()) + CP->getMachineCPVal()->addSelectionDAGCSEId(ID); + else + ID.AddPointer(CP->getConstVal()); + ID.AddInteger(CP->getTargetFlags()); + break; + } + case ISD::TargetIndex: { + const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N); + ID.AddInteger(TI->getIndex()); + ID.AddInteger(TI->getOffset()); + ID.AddInteger(TI->getTargetFlags()); + break; + } + case ISD::LOAD: { + const LoadSDNode *LD = cast<LoadSDNode>(N); + ID.AddInteger(LD->getMemoryVT().getRawBits()); + ID.AddInteger(LD->getRawSubclassData()); + ID.AddInteger(LD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::STORE: { + const StoreSDNode *ST = cast<StoreSDNode>(N); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MLOAD: { + const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); + ID.AddInteger(MLD->getMemoryVT().getRawBits()); + ID.AddInteger(MLD->getRawSubclassData()); + ID.AddInteger(MLD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MSTORE: { + const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); + ID.AddInteger(MST->getMemoryVT().getRawBits()); + ID.AddInteger(MST->getRawSubclassData()); + ID.AddInteger(MST->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MGATHER: { + const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N); + ID.AddInteger(MG->getMemoryVT().getRawBits()); + ID.AddInteger(MG->getRawSubclassData()); + ID.AddInteger(MG->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MSCATTER: { + const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N); + ID.AddInteger(MS->getMemoryVT().getRawBits()); + ID.AddInteger(MS->getRawSubclassData()); + ID.AddInteger(MS->getPointerInfo().getAddrSpace()); + break; + } + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: { + const AtomicSDNode *AT = cast<AtomicSDNode>(N); + ID.AddInteger(AT->getMemoryVT().getRawBits()); + ID.AddInteger(AT->getRawSubclassData()); + ID.AddInteger(AT->getPointerInfo().getAddrSpace()); + break; + } + case ISD::PREFETCH: { + const MemSDNode *PF = cast<MemSDNode>(N); + ID.AddInteger(PF->getPointerInfo().getAddrSpace()); + break; + } + case ISD::VECTOR_SHUFFLE: { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); + i != e; ++i) + ID.AddInteger(SVN->getMaskElt(i)); + break; + } + case ISD::TargetBlockAddress: + case ISD::BlockAddress: { + const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N); + ID.AddPointer(BA->getBlockAddress()); + ID.AddInteger(BA->getOffset()); + ID.AddInteger(BA->getTargetFlags()); + break; + } + } // end switch (N->getOpcode()) + + // Target specific memory nodes could also have address spaces to check. + if (N->isTargetMemoryOpcode()) + ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); +} + +/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID +/// data. +static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { + AddNodeIDOpcode(ID, N->getOpcode()); + // Add the return value info. + AddNodeIDValueTypes(ID, N->getVTList()); + // Add the operand info. + AddNodeIDOperands(ID, N->ops()); + + // Handle SDNode leafs with special info. + AddNodeIDCustom(ID, N); +} + +//===----------------------------------------------------------------------===// +// SelectionDAG Class +//===----------------------------------------------------------------------===// + +/// doNotCSE - Return true if CSE should not be performed for this node. +static bool doNotCSE(SDNode *N) { + if (N->getValueType(0) == MVT::Glue) + return true; // Never CSE anything that produces a flag. + + switch (N->getOpcode()) { + default: break; + case ISD::HANDLENODE: + case ISD::EH_LABEL: + return true; // Never CSE these nodes. + } + + // Check that remaining values produced are not flags. + for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Glue) + return true; // Never CSE anything that produces a flag. + + return false; +} + +/// RemoveDeadNodes - This method deletes all unreachable nodes in the +/// SelectionDAG. +void SelectionDAG::RemoveDeadNodes() { + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted. + HandleSDNode Dummy(getRoot()); + + SmallVector<SDNode*, 128> DeadNodes; + + // Add all obviously-dead nodes to the DeadNodes worklist. + for (SDNode &Node : allnodes()) + if (Node.use_empty()) + DeadNodes.push_back(&Node); + + RemoveDeadNodes(DeadNodes); + + // If the root changed (e.g. it was a dead load, update the root). + setRoot(Dummy.getValue()); +} + +/// RemoveDeadNodes - This method deletes the unreachable nodes in the +/// given list, and any nodes that become unreachable as a result. +void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) { + + // Process the worklist, deleting the nodes and adding their uses to the + // worklist. + while (!DeadNodes.empty()) { + SDNode *N = DeadNodes.pop_back_val(); + // Skip to next node if we've already managed to delete the node. This could + // happen if replacing a node causes a node previously added to the node to + // be deleted. + if (N->getOpcode() == ISD::DELETED_NODE) + continue; + + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeDeleted(N, nullptr); + + // Take the node out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Next, brutally remove the operand list. This is safe to do, as there are + // no cycles in the graph. + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Operand = Use.getNode(); + Use.set(SDValue()); + + // Now that we removed this operand, see if there are no uses of it left. + if (Operand->use_empty()) + DeadNodes.push_back(Operand); + } + + DeallocateNode(N); + } +} + +void SelectionDAG::RemoveDeadNode(SDNode *N){ + SmallVector<SDNode*, 16> DeadNodes(1, N); + + // Create a dummy node that adds a reference to the root node, preventing + // it from being deleted. (This matters if the root is an operand of the + // dead node.) + HandleSDNode Dummy(getRoot()); + + RemoveDeadNodes(DeadNodes); +} + +void SelectionDAG::DeleteNode(SDNode *N) { + // First take this out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Finally, remove uses due to operands of this node, remove from the + // AllNodes list, and delete the node. + DeleteNodeNotInCSEMaps(N); +} + +void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { + assert(N->getIterator() != AllNodes.begin() && + "Cannot delete the entry node!"); + assert(N->use_empty() && "Cannot delete a node that is not dead!"); + + // Drop all of the operands and decrement used node's use counts. + N->DropOperands(); + + DeallocateNode(N); +} + +void SDDbgInfo::erase(const SDNode *Node) { + DbgValMapType::iterator I = DbgValMap.find(Node); + if (I == DbgValMap.end()) + return; + for (auto &Val: I->second) + Val->setIsInvalidated(); + DbgValMap.erase(I); +} + +void SelectionDAG::DeallocateNode(SDNode *N) { + // If we have operands, deallocate them. + removeOperands(N); + + NodeAllocator.Deallocate(AllNodes.remove(N)); + + // Set the opcode to DELETED_NODE to help catch bugs when node + // memory is reallocated. + // FIXME: There are places in SDag that have grown a dependency on the opcode + // value in the released node. + __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType)); + N->NodeType = ISD::DELETED_NODE; + + // If any of the SDDbgValue nodes refer to this SDNode, invalidate + // them and forget about that node. + DbgInfo->erase(N); +} + +#ifndef NDEBUG +/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. +static void VerifySDNode(SDNode *N) { + switch (N->getOpcode()) { + default: + break; + case ISD::BUILD_PAIR: { + EVT VT = N->getValueType(0); + assert(N->getNumValues() == 1 && "Too many results!"); + assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && + "Wrong return type!"); + assert(N->getNumOperands() == 2 && "Wrong number of operands!"); + assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && + "Mismatched operand types!"); + assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && + "Wrong operand type!"); + assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && + "Wrong return type size"); + break; + } + case ISD::BUILD_VECTOR: { + assert(N->getNumValues() == 1 && "Too many results!"); + assert(N->getValueType(0).isVector() && "Wrong return type!"); + assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && + "Wrong number of operands!"); + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { + assert((I->getValueType() == EltVT || + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); + assert(I->getValueType() == N->getOperand(0).getValueType() && + "Operands must all have the same type"); + } + break; + } + } +} +#endif // NDEBUG + +/// Insert a newly allocated node into the DAG. +/// +/// Handles insertion into the all nodes list and CSE map, as well as +/// verification and other common operations when a new node is allocated. +void SelectionDAG::InsertNode(SDNode *N) { + AllNodes.push_back(N); +#ifndef NDEBUG + N->PersistentId = NextPersistentId++; + VerifySDNode(N); +#endif + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeInserted(N); +} + +/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that +/// correspond to it. This is useful when we're about to delete or repurpose +/// the node. We don't want future request for structurally identical nodes +/// to return N anymore. +bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { + bool Erased = false; + switch (N->getOpcode()) { + case ISD::HANDLENODE: return false; // noop. + case ISD::CONDCODE: + assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && + "Cond code doesn't exist!"); + Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr; + CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr; + break; + case ISD::ExternalSymbol: + Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); + break; + case ISD::TargetExternalSymbol: { + ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N); + Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>( + ESN->getSymbol(), ESN->getTargetFlags())); + break; + } + case ISD::MCSymbol: { + auto *MCSN = cast<MCSymbolSDNode>(N); + Erased = MCSymbols.erase(MCSN->getMCSymbol()); + break; + } + case ISD::VALUETYPE: { + EVT VT = cast<VTSDNode>(N)->getVT(); + if (VT.isExtended()) { + Erased = ExtendedValueTypeNodes.erase(VT); + } else { + Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr; + ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr; + } + break; + } + default: + // Remove it from the CSE Map. + assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"); + assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!"); + Erased = CSEMap.RemoveNode(N); + break; + } +#ifndef NDEBUG + // Verify that the node was actually in one of the CSE maps, unless it has a + // flag result (which cannot be CSE'd) or is one of the special cases that are + // not subject to CSE. + if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue && + !N->isMachineOpcode() && !doNotCSE(N)) { + N->dump(this); + dbgs() << "\n"; + llvm_unreachable("Node is not in map!"); + } +#endif + return Erased; +} + +/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE +/// maps and modified in place. Add it back to the CSE maps, unless an identical +/// node already exists, in which case transfer all its users to the existing +/// node. This transfer can potentially trigger recursive merging. +void +SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { + // For node types that aren't CSE'd, just act as if no identical node + // already exists. + if (!doNotCSE(N)) { + SDNode *Existing = CSEMap.GetOrInsertNode(N); + if (Existing != N) { + // If there was already an existing matching node, use ReplaceAllUsesWith + // to replace the dead one with the existing one. This can cause + // recursive merging of other unrelated nodes down the line. + ReplaceAllUsesWith(N, Existing); + + // N is now dead. Inform the listeners and delete it. + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeDeleted(N, Existing); + DeleteNodeNotInCSEMaps(N); + return; + } + } + + // If the node doesn't already exist, we updated it. Inform listeners. + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeUpdated(N); +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, + void *&InsertPos) { + if (doNotCSE(N)) + return nullptr; + + SDValue Ops[] = { Op }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); + AddNodeIDCustom(ID, N); + SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); + if (Node) + Node->intersectFlagsWith(N->getFlags()); + return Node; +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + SDValue Op1, SDValue Op2, + void *&InsertPos) { + if (doNotCSE(N)) + return nullptr; + + SDValue Ops[] = { Op1, Op2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); + AddNodeIDCustom(ID, N); + SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); + if (Node) + Node->intersectFlagsWith(N->getFlags()); + return Node; +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, + void *&InsertPos) { + if (doNotCSE(N)) + return nullptr; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); + AddNodeIDCustom(ID, N); + SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); + if (Node) + Node->intersectFlagsWith(N->getFlags()); + return Node; +} + +unsigned SelectionDAG::getEVTAlignment(EVT VT) const { + Type *Ty = VT == MVT::iPTR ? + PointerType::get(Type::getInt8Ty(*getContext()), 0) : + VT.getTypeForEVT(*getContext()); + + return getDataLayout().getABITypeAlignment(Ty); +} + +// EntryNode could meaningfully have debug info if we can find it... +SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) + : TM(tm), OptLevel(OL), + EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), + Root(getEntryNode()) { + InsertNode(&EntryNode); + DbgInfo = new SDDbgInfo(); +} + +void SelectionDAG::init(MachineFunction &NewMF, + OptimizationRemarkEmitter &NewORE, + Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, + LegacyDivergenceAnalysis * Divergence) { + MF = &NewMF; + SDAGISelPass = PassPtr; + ORE = &NewORE; + TLI = getSubtarget().getTargetLowering(); + TSI = getSubtarget().getSelectionDAGInfo(); + LibInfo = LibraryInfo; + Context = &MF->getFunction().getContext(); + DA = Divergence; +} + +SelectionDAG::~SelectionDAG() { + assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); + allnodes_clear(); + OperandRecycler.clear(OperandAllocator); + delete DbgInfo; +} + +void SelectionDAG::allnodes_clear() { + assert(&*AllNodes.begin() == &EntryNode); + AllNodes.remove(AllNodes.begin()); + while (!AllNodes.empty()) + DeallocateNode(&AllNodes.front()); +#ifndef NDEBUG + NextPersistentId = 0; +#endif +} + +SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + void *&InsertPos) { + SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (N) { + switch (N->getOpcode()) { + default: break; + case ISD::Constant: + case ISD::ConstantFP: + llvm_unreachable("Querying for Constant and ConstantFP nodes requires " + "debug location. Use another overload."); + } + } + return N; +} + +SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + const SDLoc &DL, void *&InsertPos) { + SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (N) { + switch (N->getOpcode()) { + case ISD::Constant: + case ISD::ConstantFP: + // Erase debug location from the node if the node is used at several + // different places. Do not propagate one location to all uses as it + // will cause a worse single stepping debugging experience. + if (N->getDebugLoc() != DL.getDebugLoc()) + N->setDebugLoc(DebugLoc()); + break; + default: + // When the node's point of use is located earlier in the instruction + // sequence than its prior point of use, update its debug info to the + // earlier location. + if (DL.getIROrder() && DL.getIROrder() < N->getIROrder()) + N->setDebugLoc(DL.getDebugLoc()); + break; + } + } + return N; +} + +void SelectionDAG::clear() { + allnodes_clear(); + OperandRecycler.clear(OperandAllocator); + OperandAllocator.Reset(); + CSEMap.clear(); + + ExtendedValueTypeNodes.clear(); + ExternalSymbols.clear(); + TargetExternalSymbols.clear(); + MCSymbols.clear(); + SDCallSiteDbgInfo.clear(); + std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), + static_cast<CondCodeSDNode*>(nullptr)); + std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), + static_cast<SDNode*>(nullptr)); + + EntryNode.UseList = nullptr; + InsertNode(&EntryNode); + Root = getEntryNode(); + DbgInfo->clear(); +} + +SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) + ? getNode(ISD::FP_EXTEND, DL, VT, Op) + : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL)); +} + +SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::ANY_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::SIGN_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::ZERO_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, + EVT OpVT) { + if (VT.bitsLE(Op.getValueType())) + return getNode(ISD::TRUNCATE, SL, VT, Op); + + TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT); + return getNode(TLI->getExtendForContent(BType), SL, VT, Op); +} + +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { + assert(!VT.isVector() && + "getZeroExtendInReg should use the vector element type instead of " + "the vector type!"); + if (Op.getValueType().getScalarType() == VT) return Op; + unsigned BitWidth = Op.getScalarValueSizeInBits(); + APInt Imm = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); + return getNode(ISD::AND, DL, Op.getValueType(), Op, + getConstant(Imm, DL, Op.getValueType())); +} + +SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { + // Only unsigned pointer semantics are supported right now. In the future this + // might delegate to TLI to check pointer signedness. + return getZExtOrTrunc(Op, DL, VT); +} + +SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { + // Only unsigned pointer semantics are supported right now. In the future this + // might delegate to TLI to check pointer signedness. + return getZeroExtendInReg(Op, DL, VT); +} + +/// getNOT - Create a bitwise NOT operation as (XOR Val, -1). +SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { + EVT EltVT = VT.getScalarType(); + SDValue NegOne = + getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); + return getNode(ISD::XOR, DL, VT, Val, NegOne); +} + +SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { + SDValue TrueValue = getBoolConstant(true, DL, VT, VT); + return getNode(ISD::XOR, DL, VT, Val, TrueValue); +} + +SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, + EVT OpVT) { + if (!V) + return getConstant(0, DL, VT); + + switch (TLI->getBooleanContents(OpVT)) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + return getConstant(1, DL, VT); + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return getAllOnesConstant(DL, VT); + } + llvm_unreachable("Unexpected boolean content enum!"); +} + +SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, + bool isT, bool isO) { + EVT EltVT = VT.getScalarType(); + assert((EltVT.getSizeInBits() >= 64 || + (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && + "getConstant with a uint64_t value that doesn't fit in the type!"); + return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO); +} + +SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT, + bool isT, bool isO) { + return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO); +} + +SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, + EVT VT, bool isT, bool isO) { + assert(VT.isInteger() && "Cannot create FP integer constant!"); + + EVT EltVT = VT.getScalarType(); + const ConstantInt *Elt = &Val; + + // In some cases the vector type is legal but the element type is illegal and + // needs to be promoted, for example v8i8 on ARM. In this case, promote the + // inserted value (the type does not need to match the vector element type). + // Any extra bits introduced will be truncated away. + if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == + TargetLowering::TypePromoteInteger) { + EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); + Elt = ConstantInt::get(*getContext(), NewVal); + } + // In other cases the element type is illegal and needs to be expanded, for + // example v2i64 on MIPS32. In this case, find the nearest legal type, split + // the value into n parts and use a vector type with n-times the elements. + // Then bitcast to the type requested. + // Legalizing constants too early makes the DAGCombiner's job harder so we + // only legalize if the DAG tells us we must produce legal types. + else if (NewNodesMustHaveLegalTypes && VT.isVector() && + TLI->getTypeAction(*getContext(), EltVT) == + TargetLowering::TypeExpandInteger) { + const APInt &NewVal = Elt->getValue(); + EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); + unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; + EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); + + // Check the temporary vector is the correct size. If this fails then + // getTypeToTransformTo() probably returned a type whose size (in bits) + // isn't a power-of-2 factor of the requested type size. + assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); + + SmallVector<SDValue, 2> EltParts; + for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { + EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) + .zextOrTrunc(ViaEltSizeInBits), DL, + ViaEltVT, isT, isO)); + } + + // EltParts is currently in little endian order. If we actually want + // big-endian order then reverse it now. + if (getDataLayout().isBigEndian()) + std::reverse(EltParts.begin(), EltParts.end()); + + // The elements must be reversed when the element order is different + // to the endianness of the elements (because the BITCAST is itself a + // vector shuffle in this situation). However, we do not need any code to + // perform this reversal because getConstant() is producing a vector + // splat. + // This situation occurs in MIPS MSA. + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) + Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); + + SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); + return V; + } + + assert(Elt->getBitWidth() == EltVT.getSizeInBits() && + "APInt size does not match type size!"); + unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), None); + ID.AddPointer(Elt); + ID.AddBoolean(isO); + void *IP = nullptr; + SDNode *N = nullptr; + if ((N = FindNodeOrInsertPos(ID, DL, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + + if (!N) { + N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT); + CSEMap.InsertNode(N, IP); + InsertNode(N); + NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); + } + + SDValue Result(N, 0); + if (VT.isVector()) + Result = getSplatBuildVector(VT, DL, Result); + + return Result; +} + +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, + bool isTarget) { + return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget); +} + +SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, + const SDLoc &DL, bool LegalTypes) { + EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes); + return getConstant(Val, DL, ShiftVT); +} + +SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, + bool isTarget) { + return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); +} + +SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, + EVT VT, bool isTarget) { + assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); + + EVT EltVT = VT.getScalarType(); + + // Do the map lookup using the actual bit pattern for the floating point + // value, so that we don't have problems with 0.0 comparing equal to -0.0, and + // we don't have issues with SNANs. + unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), None); + ID.AddPointer(&V); + void *IP = nullptr; + SDNode *N = nullptr; + if ((N = FindNodeOrInsertPos(ID, DL, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + + if (!N) { + N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT); + CSEMap.InsertNode(N, IP); + InsertNode(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) + Result = getSplatBuildVector(VT, DL, Result); + NewSDValueDbgMsg(Result, "Creating fp constant: ", this); + return Result; +} + +SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, + bool isTarget) { + EVT EltVT = VT.getScalarType(); + if (EltVT == MVT::f32) + return getConstantFP(APFloat((float)Val), DL, VT, isTarget); + else if (EltVT == MVT::f64) + return getConstantFP(APFloat(Val), DL, VT, isTarget); + else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 || + EltVT == MVT::f16) { + bool Ignored; + APFloat APF = APFloat(Val); + APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + &Ignored); + return getConstantFP(APF, DL, VT, isTarget); + } else + llvm_unreachable("Unsupported type in getConstantFP"); +} + +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, + EVT VT, int64_t Offset, bool isTargetGA, + unsigned TargetFlags) { + assert((TargetFlags == 0 || isTargetGA) && + "Cannot set target flags on target-independent globals"); + + // Truncate (with sign-extension) the offset value to the pointer size. + unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); + if (BitWidth < 64) + Offset = SignExtend64(Offset, BitWidth); + + unsigned Opc; + if (GV->isThreadLocal()) + Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; + else + Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddPointer(GV); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<GlobalAddressSDNode>( + Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { + unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddInteger(FI); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, + unsigned TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent jump tables"); + unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddInteger(JTI); + ID.AddInteger(TargetFlags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, + unsigned Alignment, int Offset, + bool isTarget, + unsigned TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent globals"); + if (Alignment == 0) + Alignment = MF->getFunction().hasOptSize() + ? getDataLayout().getABITypeAlignment(C->getType()) + : getDataLayout().getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + ID.AddPointer(C); + ID.AddInteger(TargetFlags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, + unsigned Alignment, int Offset, + bool isTarget, + unsigned TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent globals"); + if (Alignment == 0) + Alignment = getDataLayout().getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + C->addSelectionDAGCSEId(ID); + ID.AddInteger(TargetFlags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, + unsigned TargetFlags) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); + ID.AddInteger(Index); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None); + ID.AddPointer(MBB); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<BasicBlockSDNode>(MBB); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getValueType(EVT VT) { + if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >= + ValueTypeNodes.size()) + ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1); + + SDNode *&N = VT.isExtended() ? + ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; + + if (N) return SDValue(N, 0); + N = newSDNode<VTSDNode>(VT); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { + SDNode *&N = ExternalSymbols[Sym]; + if (N) return SDValue(N, 0); + N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { + SDNode *&N = MCSymbols[Sym]; + if (N) + return SDValue(N, 0); + N = newSDNode<MCSymbolSDNode>(Sym, VT); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, + unsigned TargetFlags) { + SDNode *&N = + TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)]; + if (N) return SDValue(N, 0); + N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { + if ((unsigned)Cond >= CondCodeNodes.size()) + CondCodeNodes.resize(Cond+1); + + if (!CondCodeNodes[Cond]) { + auto *N = newSDNode<CondCodeSDNode>(Cond); + CondCodeNodes[Cond] = N; + InsertNode(N); + } + + return SDValue(CondCodeNodes[Cond], 0); +} + +/// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that +/// point at N1 to point at N2 and indices that point at N2 to point at N1. +static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) { + std::swap(N1, N2); + ShuffleVectorSDNode::commuteMask(M); +} + +SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, + SDValue N2, ArrayRef<int> Mask) { + assert(VT.getVectorNumElements() == Mask.size() && + "Must have the same number of vector elements as mask elements!"); + assert(VT == N1.getValueType() && VT == N2.getValueType() && + "Invalid VECTOR_SHUFFLE"); + + // Canonicalize shuffle undef, undef -> undef + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); + + // Validate that all indices in Mask are within the range of the elements + // input to the shuffle. + int NElts = Mask.size(); + assert(llvm::all_of(Mask, + [&](int M) { return M < (NElts * 2) && M >= -1; }) && + "Index out of range"); + + // Copy the mask so we can do any needed cleanup. + SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end()); + + // Canonicalize shuffle v, v -> v, undef + if (N1 == N2) { + N2 = getUNDEF(VT); + for (int i = 0; i != NElts; ++i) + if (MaskVec[i] >= NElts) MaskVec[i] -= NElts; + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N1.isUndef()) + commuteShuffle(N1, N2, MaskVec); + + if (TLI->hasVectorBlend()) { + // If shuffling a splat, try to blend the splat instead. We do this here so + // that even when this arises during lowering we don't have to re-handle it. + auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + if (!Splat) + return; + + for (int i = 0; i < NElts; ++i) { + if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) + continue; + + // If this input comes from undef, mark it as such. + if (UndefElements[MaskVec[i] - Offset]) { + MaskVec[i] = -1; + continue; + } + + // If we can blend a non-undef lane, use that instead. + if (!UndefElements[i]) + MaskVec[i] = i + Offset; + } + }; + if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) + BlendSplat(N1BV, 0); + if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) + BlendSplat(N2BV, NElts); + } + + // Canonicalize all index into lhs, -> shuffle lhs, undef + // Canonicalize all index into rhs, -> shuffle rhs, undef + bool AllLHS = true, AllRHS = true; + bool N2Undef = N2.isUndef(); + for (int i = 0; i != NElts; ++i) { + if (MaskVec[i] >= NElts) { + if (N2Undef) + MaskVec[i] = -1; + else + AllLHS = false; + } else if (MaskVec[i] >= 0) { + AllRHS = false; + } + } + if (AllLHS && AllRHS) + return getUNDEF(VT); + if (AllLHS && !N2Undef) + N2 = getUNDEF(VT); + if (AllRHS) { + N1 = getUNDEF(VT); + commuteShuffle(N1, N2, MaskVec); + } + // Reset our undef status after accounting for the mask. + N2Undef = N2.isUndef(); + // Re-check whether both sides ended up undef. + if (N1.isUndef() && N2Undef) + return getUNDEF(VT); + + // If Identity shuffle return that node. + bool Identity = true, AllSame = true; + for (int i = 0; i != NElts; ++i) { + if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false; + if (MaskVec[i] != MaskVec[0]) AllSame = false; + } + if (Identity && NElts) + return N1; + + // Shuffling a constant splat doesn't change the result. + if (N2Undef) { + SDValue V = N1; + + // Look through any bitcasts. We check that these don't change the number + // (and size) of elements and just changes their types. + while (V.getOpcode() == ISD::BITCAST) + V = V->getOperand(0); + + // A splat should always show up as a build vector node. + if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + // If this is a splat of an undef, shuffling it is also undef. + if (Splat && Splat.isUndef()) + return getUNDEF(VT); + + bool SameNumElts = + V.getValueType().getVectorNumElements() == VT.getVectorNumElements(); + + // We only have a splat which can skip shuffles if there is a splatted + // value and no undef lanes rearranged by the shuffle. + if (Splat && UndefElements.none()) { + // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the + // number of elements match or the value splatted is a zero constant. + if (SameNumElts) + return N1; + if (auto *C = dyn_cast<ConstantSDNode>(Splat)) + if (C->isNullValue()) + return N1; + } + + // If the shuffle itself creates a splat, build the vector directly. + if (AllSame && SameNumElts) { + EVT BuildVT = BV->getValueType(0); + const SDValue &Splatted = BV->getOperand(MaskVec[0]); + SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (BuildVT != VT) + NewBV = getNode(ISD::BITCAST, dl, VT, NewBV); + return NewBV; + } + } + } + + FoldingSetNodeID ID; + SDValue Ops[2] = { N1, N2 }; + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops); + for (int i = 0; i != NElts; ++i) + ID.AddInteger(MaskVec[i]); + + void* IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) + return SDValue(E, 0); + + // Allocate the mask array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the NodeAllocator is released. + int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); + llvm::copy(MaskVec, MaskAlloc); + + auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(), + dl.getDebugLoc(), MaskAlloc); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { + EVT VT = SV.getValueType(0); + SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); + ShuffleVectorSDNode::commuteMask(MaskVec); + + SDValue Op0 = SV.getOperand(0); + SDValue Op1 = SV.getOperand(1); + return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec); +} + +SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); + ID.AddInteger(RegNo); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<RegisterSDNode>(RegNo, VT); + N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None); + ID.AddPointer(RegMask); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<RegisterMaskSDNode>(RegMask); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root, + MCSymbol *Label) { + return getLabelNode(ISD::EH_LABEL, dl, Root, Label); +} + +SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, + SDValue Root, MCSymbol *Label) { + FoldingSetNodeID ID; + SDValue Ops[] = { Root }; + AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops); + ID.AddPointer(Label); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = + newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, + int64_t Offset, bool isTarget, + unsigned TargetFlags) { + unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddPointer(BA); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getSrcValue(const Value *V) { + assert((!V || V->getType()->isPointerTy()) && + "SrcValue is not a pointer?"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); + ID.AddPointer(V); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<SrcValueSDNode>(V); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMDNode(const MDNode *MD) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None); + ID.AddPointer(MD); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<MDNodeSDNode>(MD); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) { + if (VT == V.getValueType()) + return V; + + return getNode(ISD::BITCAST, SDLoc(V), VT, V); +} + +SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, + unsigned SrcAS, unsigned DestAS) { + SDValue Ops[] = {Ptr}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops); + ID.AddInteger(SrcAS); + ID.AddInteger(DestAS); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(), + VT, SrcAS, DestAS); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +/// getShiftAmountOperand - Return the specified value casted to +/// the target's desired shift amount type. +SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { + EVT OpTy = Op.getValueType(); + EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout()); + if (OpTy == ShTy || OpTy.isVector()) return Op; + + return getZExtOrTrunc(Op, SDLoc(Op), ShTy); +} + +SDValue SelectionDAG::expandVAArg(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + const MaybeAlign MA(Node->getConstantOperandVal(3)); + + SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, + Tmp2, MachinePointerInfo(V)); + SDValue VAList = VAListLoad; + + if (MA && *MA > TLI.getMinStackArgumentAlignment()) { + VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(MA->value() - 1, dl, VAList.getValueType())); + + VAList = + getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)MA->value(), dl, VAList.getValueType())); + } + + // Increment the pointer, VAList, to the next vaarg + Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(getDataLayout().getTypeAllocSize( + VT.getTypeForEVT(*getContext())), + dl, VAList.getValueType())); + // Store the incremented VAList to the legalized pointer + Tmp1 = + getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V)); + // Load the actual argument out of the pointer VAList + return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo()); +} + +SDValue SelectionDAG::expandVACopy(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); + const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); + SDValue Tmp1 = + getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0), + Node->getOperand(2), MachinePointerInfo(VS)); + return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), + MachinePointerInfo(VD)); +} + +SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { + MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); + unsigned ByteSize = VT.getStoreSize(); + Type *Ty = VT.getTypeForEVT(*getContext()); + unsigned StackAlign = + std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); + + int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); + return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); +} + +SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { + unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); + Type *Ty1 = VT1.getTypeForEVT(*getContext()); + Type *Ty2 = VT2.getTypeForEVT(*getContext()); + const DataLayout &DL = getDataLayout(); + unsigned Align = + std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2)); + + MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); + int FrameIdx = MFI.CreateStackObject(Bytes, Align, false); + return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); +} + +SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, + ISD::CondCode Cond, const SDLoc &dl) { + EVT OpVT = N1.getValueType(); + + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT); + + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETO: + case ISD::SETUO: + case ISD::SETUEQ: + case ISD::SETUNE: + assert(!OpVT.isInteger() && "Illegal setcc for integer!"); + break; + } + + if (OpVT.isInteger()) { + // For EQ and NE, we can always pick a value for the undef to make the + // predicate pass or fail, so we can return undef. + // Matches behavior in llvm::ConstantFoldCompareInstruction. + // icmp eq/ne X, undef -> undef. + if ((N1.isUndef() || N2.isUndef()) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) + return getUNDEF(VT); + + // If both operands are undef, we can return undef for int comparison. + // icmp undef, undef -> undef. + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); + + // icmp X, X -> true/false + // icmp X, undef -> true/false because undef could be X. + if (N1 == N2) + return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); + } + + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) { + const APInt &C2 = N2C->getAPIntValue(); + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { + const APInt &C1 = N1C->getAPIntValue(); + + switch (Cond) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT); + case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT); + case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT); + case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT); + case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT); + case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT); + case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT); + case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT); + case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT); + case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT); + } + } + } + + auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + + if (N1CFP && N2CFP) { + APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF()); + switch (Cond) { + default: break; + case ISD::SETEQ: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETNE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETLT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETGT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, + VT, OpVT); + case ISD::SETLE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETGE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, dl, VT, OpVT); + case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, + VT, OpVT); + case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, + OpVT); + } + } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) { + // Ensure that the constant occurs on the RHS. + ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); + if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT())) + return SDValue(); + return getSetCC(dl, VT, N2, N1, SwappedCond); + } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) || + (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) { + // If an operand is known to be a nan (or undef that could be a nan), we can + // fold it. + // Choosing NaN for the undef will always make unordered comparison succeed + // and ordered comparison fails. + // Matches behavior in llvm::ConstantFoldCompareInstruction. + switch (ISD::getUnorderedFlavor(Cond)) { + default: + llvm_unreachable("Unknown flavor!"); + case 0: // Known false. + return getBoolConstant(false, dl, VT, OpVT); + case 1: // Known true. + return getBoolConstant(true, dl, VT, OpVT); + case 2: // Undefined. + return getUNDEF(VT); + } + } + + // Could not fold it. + return SDValue(); +} + +/// See if the specified operand can be simplified with the knowledge that only +/// the bits specified by DemandedBits are used. +/// TODO: really we should be making this into the DAG equivalent of +/// SimplifyMultipleUseDemandedBits and not generate any new nodes. +SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { + EVT VT = V.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return GetDemandedBits(V, DemandedBits, DemandedElts); +} + +/// See if the specified operand can be simplified with the knowledge that only +/// the bits specified by DemandedBits are used in the elements specified by +/// DemandedElts. +/// TODO: really we should be making this into the DAG equivalent of +/// SimplifyMultipleUseDemandedBits and not generate any new nodes. +SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, + const APInt &DemandedElts) { + switch (V.getOpcode()) { + default: + break; + case ISD::Constant: { + auto *CV = cast<ConstantSDNode>(V.getNode()); + assert(CV && "Const value should be ConstSDNode."); + const APInt &CVal = CV->getAPIntValue(); + APInt NewVal = CVal & DemandedBits; + if (NewVal != CVal) + return getConstant(NewVal, SDLoc(V), V.getValueType()); + break; + } + case ISD::OR: + case ISD::XOR: + case ISD::SIGN_EXTEND_INREG: + return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, + *this, 0); + case ISD::SRL: + // Only look at single-use SRLs. + if (!V.getNode()->hasOneUse()) + break; + if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + // See if we can recursively simplify the LHS. + unsigned Amt = RHSC->getZExtValue(); + + // Watch out for shift count overflow though. + if (Amt >= DemandedBits.getBitWidth()) + break; + APInt SrcDemandedBits = DemandedBits << Amt; + if (SDValue SimplifyLHS = + GetDemandedBits(V.getOperand(0), SrcDemandedBits)) + return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, + V.getOperand(1)); + } + break; + case ISD::AND: { + // X & -1 -> X (ignoring bits which aren't demanded). + // Also handle the case where masked out bits in X are known to be zero. + if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) { + const APInt &AndVal = RHSC->getAPIntValue(); + if (DemandedBits.isSubsetOf(AndVal) || + DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero | + AndVal)) + return V.getOperand(0); + } + break; + } + case ISD::ANY_EXTEND: { + SDValue Src = V.getOperand(0); + unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); + // Being conservative here - only peek through if we only demand bits in the + // non-extended source (even though the extended bits are technically + // undef). + if (DemandedBits.getActiveBits() > SrcBitWidth) + break; + APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth); + if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits)) + return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); + break; + } + } + return SDValue(); +} + +/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We +/// use this predicate to simplify operations downstream. +bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { + unsigned BitWidth = Op.getScalarValueSizeInBits(); + return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, + unsigned Depth) const { + EVT VT = V.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return MaskedValueIsZero(V, Mask, DemandedElts, Depth); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in +/// DemandedElts. We use this predicate to simplify operations downstream. +/// Mask is known to be zero for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, + const APInt &DemandedElts, + unsigned Depth) const { + return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero); +} + +/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'. +bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, + unsigned Depth) const { + return Mask.isSubsetOf(computeKnownBits(V, Depth).One); +} + +/// isSplatValue - Return true if the vector V has the same value +/// across all DemandedElts. +bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, + APInt &UndefElts) { + if (!DemandedElts) + return false; // No demanded elts, better to assume we don't know anything. + + EVT VT = V.getValueType(); + assert(VT.isVector() && "Vector type expected"); + + unsigned NumElts = VT.getVectorNumElements(); + assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); + UndefElts = APInt::getNullValue(NumElts); + + switch (V.getOpcode()) { + case ISD::BUILD_VECTOR: { + SDValue Scl; + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = V.getOperand(i); + if (Op.isUndef()) { + UndefElts.setBit(i); + continue; + } + if (!DemandedElts[i]) + continue; + if (Scl && Scl != Op) + return false; + Scl = Op; + } + return true; + } + case ISD::VECTOR_SHUFFLE: { + // Check if this is a shuffle node doing a splat. + // TODO: Do we need to handle shuffle(splat, undef, mask)? + int SplatIndex = -1; + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask(); + for (int i = 0; i != (int)NumElts; ++i) { + int M = Mask[i]; + if (M < 0) { + UndefElts.setBit(i); + continue; + } + if (!DemandedElts[i]) + continue; + if (0 <= SplatIndex && SplatIndex != M) + return false; + SplatIndex = M; + } + return true; + } + case ISD::EXTRACT_SUBVECTOR: { + SDValue Src = V.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + APInt UndefSrcElts; + APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) { + UndefElts = UndefSrcElts.extractBits(NumElts, Idx); + return true; + } + } + break; + } + case ISD::ADD: + case ISD::SUB: + case ISD::AND: { + APInt UndefLHS, UndefRHS; + SDValue LHS = V.getOperand(0); + SDValue RHS = V.getOperand(1); + if (isSplatValue(LHS, DemandedElts, UndefLHS) && + isSplatValue(RHS, DemandedElts, UndefRHS)) { + UndefElts = UndefLHS | UndefRHS; + return true; + } + break; + } + } + + return false; +} + +/// Helper wrapper to main isSplatValue function. +bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { + EVT VT = V.getValueType(); + assert(VT.isVector() && "Vector type expected"); + unsigned NumElts = VT.getVectorNumElements(); + + APInt UndefElts; + APInt DemandedElts = APInt::getAllOnesValue(NumElts); + return isSplatValue(V, DemandedElts, UndefElts) && + (AllowUndefs || !UndefElts); +} + +SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { + V = peekThroughExtractSubvectors(V); + + EVT VT = V.getValueType(); + unsigned Opcode = V.getOpcode(); + switch (Opcode) { + default: { + APInt UndefElts; + APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + if (isSplatValue(V, DemandedElts, UndefElts)) { + // Handle case where all demanded elements are UNDEF. + if (DemandedElts.isSubsetOf(UndefElts)) { + SplatIdx = 0; + return getUNDEF(VT); + } + SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); + return V; + } + break; + } + case ISD::VECTOR_SHUFFLE: { + // Check if this is a shuffle node doing a splat. + // TODO - remove this and rely purely on SelectionDAG::isSplatValue, + // getTargetVShiftNode currently struggles without the splat source. + auto *SVN = cast<ShuffleVectorSDNode>(V); + if (!SVN->isSplat()) + break; + int Idx = SVN->getSplatIndex(); + int NumElts = V.getValueType().getVectorNumElements(); + SplatIdx = Idx % NumElts; + return V.getOperand(Idx / NumElts); + } + } + + return SDValue(); +} + +SDValue SelectionDAG::getSplatValue(SDValue V) { + int SplatIdx; + if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) + return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), + SrcVector.getValueType().getScalarType(), SrcVector, + getIntPtrConstant(SplatIdx, SDLoc(V))); + return SDValue(); +} + +/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that +/// is less than the element bit-width of the shift node, return it. +static const APInt *getValidShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); + if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) { + // Shifting more than the bitwidth is not valid. + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.ult(BitWidth)) + return &ShAmt; + } + return nullptr; +} + +/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less +/// than the element bit-width of the shift node, return the minimum value. +static const APInt *getValidMinimumShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); + auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); + if (!BV) + return nullptr; + const APInt *MinShAmt = nullptr; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); + if (!SA) + return nullptr; + // Shifting more than the bitwidth is not valid. + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.uge(BitWidth)) + return nullptr; + if (MinShAmt && MinShAmt->ule(ShAmt)) + continue; + MinShAmt = &ShAmt; + } + return MinShAmt; +} + +/// Determine which bits of Op are known to be either zero or one and return +/// them in Known. For vectors, the known bits are those that are shared by +/// every vector element. +KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return computeKnownBits(Op, DemandedElts, Depth); +} + +/// Determine which bits of Op are known to be either zero or one and return +/// them in Known. The DemandedElts argument allows us to only collect the known +/// bits that are shared by the requested vector elements. +KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, + unsigned Depth) const { + unsigned BitWidth = Op.getScalarValueSizeInBits(); + + KnownBits Known(BitWidth); // Don't know anything. + + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + // We know all of the bits for a constant! + Known.One = C->getAPIntValue(); + Known.Zero = ~Known.One; + return Known; + } + if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) { + // We know all of the bits for a constant fp! + Known.One = C->getValueAPF().bitcastToAPInt(); + Known.Zero = ~Known.One; + return Known; + } + + if (Depth >= MaxRecursionDepth) + return Known; // Limit search depth. + + KnownBits Known2; + unsigned NumElts = DemandedElts.getBitWidth(); + assert((!Op.getValueType().isVector() || + NumElts == Op.getValueType().getVectorNumElements()) && + "Unexpected vector size"); + + if (!DemandedElts) + return Known; // No demanded elts, better to assume we don't know anything. + + unsigned Opcode = Op.getOpcode(); + switch (Opcode) { + case ISD::BUILD_VECTOR: + // Collect the known bits that are shared by every demanded vector element. + Known.Zero.setAllBits(); Known.One.setAllBits(); + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; + + SDValue SrcOp = Op.getOperand(i); + Known2 = computeKnownBits(SrcOp, Depth + 1); + + // BUILD_VECTOR can implicitly truncate sources, we must handle this. + if (SrcOp.getValueSizeInBits() != BitWidth) { + assert(SrcOp.getValueSizeInBits() > BitWidth && + "Expected BUILD_VECTOR implicit truncation"); + Known2 = Known2.trunc(BitWidth); + } + + // Known bits are the values that are shared by every demanded element. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + } + break; + case ISD::VECTOR_SHUFFLE: { + // Collect the known bits that are shared by every vector element referenced + // by the shuffle. + APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + Known.Zero.setAllBits(); Known.One.setAllBits(); + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); + assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + + int M = SVN->getMaskElt(i); + if (M < 0) { + // For UNDEF elements, we don't know anything about the common state of + // the shuffle result. + Known.resetAll(); + DemandedLHS.clearAllBits(); + DemandedRHS.clearAllBits(); + break; + } + + if ((unsigned)M < NumElts) + DemandedLHS.setBit((unsigned)M % NumElts); + else + DemandedRHS.setBit((unsigned)M % NumElts); + } + // Known bits are the values that are shared by every demanded element. + if (!!DemandedLHS) { + SDValue LHS = Op.getOperand(0); + Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + if (!!DemandedRHS) { + SDValue RHS = Op.getOperand(1); + Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + break; + } + case ISD::CONCAT_VECTORS: { + // Split DemandedElts and test each of the demanded subvectors. + Known.Zero.setAllBits(); Known.One.setAllBits(); + EVT SubVectorVT = Op.getOperand(0).getValueType(); + unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); + unsigned NumSubVectors = Op.getNumOperands(); + for (unsigned i = 0; i != NumSubVectors; ++i) { + APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts); + DemandedSub = DemandedSub.trunc(NumSubVectorElts); + if (!!DemandedSub) { + SDValue Sub = Op.getOperand(i); + Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + } + break; + } + case ISD::INSERT_SUBVECTOR: { + // If we know the element index, demand any elements from the subvector and + // the remainder from the src its inserted into, otherwise demand them all. + SDValue Src = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { + Known.One.setAllBits(); + Known.Zero.setAllBits(); + uint64_t Idx = SubIdx->getZExtValue(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + if (!!DemandedSubElts) { + Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1); + if (Known.isUnknown()) + break; // early-out. + } + APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); + APInt DemandedSrcElts = DemandedElts & ~SubMask; + if (!!DemandedSrcElts) { + Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + } else { + Known = computeKnownBits(Sub, Depth + 1); + if (Known.isUnknown()) + break; // early-out. + Known2 = computeKnownBits(Src, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + break; + } + case ISD::EXTRACT_SUBVECTOR: { + // If we know the element index, just demand that subvector elements, + // otherwise demand them all. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + } + Known = computeKnownBits(Src, DemandedSrc, Depth + 1); + break; + } + case ISD::SCALAR_TO_VECTOR: { + // We know about scalar_to_vector as much as we know about it source, + // which becomes the first element of otherwise unknown vector. + if (DemandedElts != 1) + break; + + SDValue N0 = Op.getOperand(0); + Known = computeKnownBits(N0, Depth + 1); + if (N0.getValueSizeInBits() != BitWidth) + Known = Known.trunc(BitWidth); + + break; + } + case ISD::BITCAST: { + SDValue N0 = Op.getOperand(0); + EVT SubVT = N0.getValueType(); + unsigned SubBitWidth = SubVT.getScalarSizeInBits(); + + // Ignore bitcasts from unsupported types. + if (!(SubVT.isInteger() || SubVT.isFloatingPoint())) + break; + + // Fast handling of 'identity' bitcasts. + if (BitWidth == SubBitWidth) { + Known = computeKnownBits(N0, DemandedElts, Depth + 1); + break; + } + + bool IsLE = getDataLayout().isLittleEndian(); + + // Bitcast 'small element' vector to 'large element' scalar/vector. + if ((BitWidth % SubBitWidth) == 0) { + assert(N0.getValueType().isVector() && "Expected bitcast from vector"); + + // Collect known bits for the (larger) output by collecting the known + // bits from each set of sub elements and shift these into place. + // We need to separately call computeKnownBits for each set of + // sub elements as the knownbits for each is likely to be different. + unsigned SubScale = BitWidth / SubBitWidth; + APInt SubDemandedElts(NumElts * SubScale, 0); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SubDemandedElts.setBit(i * SubScale); + + for (unsigned i = 0; i != SubScale; ++i) { + Known2 = computeKnownBits(N0, SubDemandedElts.shl(i), + Depth + 1); + unsigned Shifts = IsLE ? i : SubScale - 1 - i; + Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts); + Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts); + } + } + + // Bitcast 'large element' scalar/vector to 'small element' vector. + if ((SubBitWidth % BitWidth) == 0) { + assert(Op.getValueType().isVector() && "Expected bitcast to vector"); + + // Collect known bits for the (smaller) output by collecting the known + // bits from the overlapping larger input elements and extracting the + // sub sections we actually care about. + unsigned SubScale = SubBitWidth / BitWidth; + APInt SubDemandedElts(NumElts / SubScale, 0); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SubDemandedElts.setBit(i / SubScale); + + Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1); + + Known.Zero.setAllBits(); Known.One.setAllBits(); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Shifts = IsLE ? i : NumElts - 1 - i; + unsigned Offset = (Shifts % SubScale) * BitWidth; + Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); + Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + } + } + break; + } + case ISD::AND: + // If either the LHS or the RHS are Zero, the result is zero. + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // Output known-1 bits are only known if set in both the LHS & RHS. + Known.One &= Known2.One; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + Known.Zero |= Known2.Zero; + break; + case ISD::OR: + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + Known.Zero &= Known2.Zero; + // Output known-1 are known to be set if set in either the LHS | RHS. + Known.One |= Known2.One; + break; + case ISD::XOR: { + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + Known.Zero = KnownZeroOut; + break; + } + case ISD::MUL: { + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conservative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + unsigned TrailZ = Known.countMinTrailingZeros() + + Known2.countMinTrailingZeros(); + unsigned LeadZ = std::max(Known.countMinLeadingZeros() + + Known2.countMinLeadingZeros(), + BitWidth) - BitWidth; + + Known.resetAll(); + Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); + Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); + break; + } + case ISD::UDIV: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be less than the denominator. + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + unsigned LeadZ = Known2.countMinLeadingZeros(); + + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros(); + if (RHSMaxLeadingZeros != BitWidth) + LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); + + Known.Zero.setHighBits(LeadZ); + break; + } + case ISD::SELECT: + case ISD::VSELECT: + Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1); + + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + case ISD::SELECT_CC: + Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); + + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + case ISD::SMULO: + case ISD::UMULO: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + if (Op.getResNo() != 1) + break; + // The boolean result conforms to getBooleanContents. + // If we know the result of a setcc has the top bits zero, use this info. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + break; + case ISD::SETCC: + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + break; + case ISD::SHL: + if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + unsigned Shift = ShAmt->getZExtValue(); + Known.Zero <<= Shift; + Known.One <<= Shift; + // Low bits are known zero. + Known.Zero.setLowBits(Shift); + } + break; + case ISD::SRL: + if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + unsigned Shift = ShAmt->getZExtValue(); + Known.Zero.lshrInPlace(Shift); + Known.One.lshrInPlace(Shift); + // High bits are known zero. + Known.Zero.setHighBits(Shift); + } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) { + // Minimum shift high bits are known zero. + Known.Zero.setHighBits(ShMinAmt->getZExtValue()); + } + break; + case ISD::SRA: + if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + unsigned Shift = ShAmt->getZExtValue(); + // Sign extend known zero/one bit (else is unknown). + Known.Zero.ashrInPlace(Shift); + Known.One.ashrInPlace(Shift); + } + break; + case ISD::FSHL: + case ISD::FSHR: + if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) { + unsigned Amt = C->getAPIntValue().urem(BitWidth); + + // For fshl, 0-shift returns the 1st arg. + // For fshr, 0-shift returns the 2nd arg. + if (Amt == 0) { + Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1), + DemandedElts, Depth + 1); + break; + } + + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (Opcode == ISD::FSHL) { + Known.One <<= Amt; + Known.Zero <<= Amt; + Known2.One.lshrInPlace(BitWidth - Amt); + Known2.Zero.lshrInPlace(BitWidth - Amt); + } else { + Known.One <<= BitWidth - Amt; + Known.Zero <<= BitWidth - Amt; + Known2.One.lshrInPlace(Amt); + Known2.Zero.lshrInPlace(Amt); + } + Known.One |= Known2.One; + Known.Zero |= Known2.Zero; + } + break; + case ISD::SIGN_EXTEND_INREG: { + EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + unsigned EBits = EVT.getScalarSizeInBits(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); + + APInt InSignMask = APInt::getSignMask(EBits); + APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); + + // If the sign extended bits are demanded, we know that the sign + // bit is demanded. + InSignMask = InSignMask.zext(BitWidth); + if (NewBits.getBoolValue()) + InputDemandedBits |= InSignMask; + + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known.One &= InputDemandedBits; + Known.Zero &= InputDemandedBits; + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear + Known.Zero |= NewBits; + Known.One &= ~NewBits; + } else if (Known.One.intersects(InSignMask)) { // Input sign bit known set + Known.One |= NewBits; + Known.Zero &= ~NewBits; + } else { // Input sign bit unknown + Known.Zero &= ~NewBits; + Known.One &= ~NewBits; + } + break; + } + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: { + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + // If we have a known 1, its position is our upper bound. + unsigned PossibleTZ = Known2.countMaxTrailingZeros(); + unsigned LowBits = Log2_32(PossibleTZ) + 1; + Known.Zero.setBitsFrom(LowBits); + break; + } + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: { + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + // If we have a known 1, its position is our upper bound. + unsigned PossibleLZ = Known2.countMaxLeadingZeros(); + unsigned LowBits = Log2_32(PossibleLZ) + 1; + Known.Zero.setBitsFrom(LowBits); + break; + } + case ISD::CTPOP: { + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + // If we know some of the bits are zero, they can't be one. + unsigned PossibleOnes = Known2.countMaxPopulation(); + Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1); + break; + } + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); + const Constant *Cst = TLI->getTargetConstantFromLoad(LD); + if (ISD::isNON_EXTLoad(LD) && Cst) { + // Determine any common known bits from the loaded constant pool value. + Type *CstTy = Cst->getType(); + if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) { + // If its a vector splat, then we can (quickly) reuse the scalar path. + // NOTE: We assume all elements match and none are UNDEF. + if (CstTy->isVectorTy()) { + if (const Constant *Splat = Cst->getSplatValue()) { + Cst = Splat; + CstTy = Cst->getType(); + } + } + // TODO - do we need to handle different bitwidths? + if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) { + // Iterate across all vector elements finding common known bits. + Known.One.setAllBits(); + Known.Zero.setAllBits(); + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + if (Constant *Elt = Cst->getAggregateElement(i)) { + if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { + const APInt &Value = CInt->getValue(); + Known.One &= Value; + Known.Zero &= ~Value; + continue; + } + if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { + APInt Value = CFP->getValueAPF().bitcastToAPInt(); + Known.One &= Value; + Known.Zero &= ~Value; + continue; + } + } + Known.One.clearAllBits(); + Known.Zero.clearAllBits(); + break; + } + } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) { + if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { + const APInt &Value = CInt->getValue(); + Known.One = Value; + Known.Zero = ~Value; + } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { + APInt Value = CFP->getValueAPF().bitcastToAPInt(); + Known.One = Value; + Known.Zero = ~Value; + } + } + } + } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { + // If this is a ZEXTLoad and we are looking at the loaded value. + EVT VT = LD->getMemoryVT(); + unsigned MemBits = VT.getScalarSizeInBits(); + Known.Zero.setBitsFrom(MemBits); + } else if (const MDNode *Ranges = LD->getRanges()) { + if (LD->getExtensionType() == ISD::NON_EXTLOAD) + computeKnownBitsFromRangeMetadata(*Ranges, Known); + } + break; + } + case ISD::ZERO_EXTEND_VECTOR_INREG: { + EVT InVT = Op.getOperand(0).getValueType(); + APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + break; + } + case ISD::ZERO_EXTEND: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + break; + } + case ISD::SIGN_EXTEND_VECTOR_INREG: { + EVT InVT = Op.getOperand(0).getValueType(); + APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); + // If the sign bit is known to be zero or one, then sext will extend + // it to the top bits, else it will just zext. + Known = Known.sext(BitWidth); + break; + } + case ISD::SIGN_EXTEND: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + // If the sign bit is known to be zero or one, then sext will extend + // it to the top bits, else it will just zext. + Known = Known.sext(BitWidth); + break; + } + case ISD::ANY_EXTEND: { + Known = computeKnownBits(Op.getOperand(0), Depth+1); + Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */); + break; + } + case ISD::TRUNCATE: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = Known.trunc(BitWidth); + break; + } + case ISD::AssertZext: { + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); + Known = computeKnownBits(Op.getOperand(0), Depth+1); + Known.Zero |= (~InMask); + Known.One &= (~Known.Zero); + break; + } + case ISD::FGETSIGN: + // All bits are zero except the low bit. + Known.Zero.setBitsFrom(1); + break; + case ISD::USUBO: + case ISD::SSUBO: + if (Op.getResNo() == 1) { + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + break; + } + LLVM_FALLTHROUGH; + case ISD::SUB: + case ISD::SUBC: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, + Known, Known2); + break; + } + case ISD::UADDO: + case ISD::SADDO: + case ISD::ADDCARRY: + if (Op.getResNo() == 1) { + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + break; + } + LLVM_FALLTHROUGH; + case ISD::ADD: + case ISD::ADDC: + case ISD::ADDE: { + assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here."); + + // With ADDE and ADDCARRY, a carry bit may be added in. + KnownBits Carry(1); + if (Opcode == ISD::ADDE) + // Can't track carry from glue, set carry to unknown. + Carry.resetAll(); + else if (Opcode == ISD::ADDCARRY) + // TODO: Compute known bits for the carry operand. Not sure if it is worth + // the trouble (how often will we find a known carry bit). And I haven't + // tested this very much yet, but something like this might work: + // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + // Carry = Carry.zextOrTrunc(1, false); + Carry.resetAll(); + else + Carry.setAllZero(); + + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::computeForAddCarry(Known, Known2, Carry); + break; + } + case ISD::SREM: + if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue().abs(); + if (RA.isPowerOf2()) { + APInt LowBits = RA - 1; + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // The low bits of the first operand are unchanged by the srem. + Known.Zero = Known2.Zero & LowBits; + Known.One = Known2.One & LowBits; + + // If the first operand is non-negative or has all low bits zero, then + // the upper bits are all zero. + if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero)) + Known.Zero |= ~LowBits; + + // If the first operand is negative and not all low bits are zero, then + // the upper bits are all one. + if (Known2.isNegative() && LowBits.intersects(Known2.One)) + Known.One |= ~LowBits; + assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?"); + } + } + break; + case ISD::UREM: { + if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue(); + if (RA.isPowerOf2()) { + APInt LowBits = (RA - 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // The upper bits are all zero, the lower ones are unchanged. + Known.Zero = Known2.Zero | ~LowBits; + Known.One = Known2.One & LowBits; + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + uint32_t Leaders = + std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); + Known.resetAll(); + Known.Zero.setHighBits(Leaders); + break; + } + case ISD::EXTRACT_ELEMENT: { + Known = computeKnownBits(Op.getOperand(0), Depth+1); + const unsigned Index = Op.getConstantOperandVal(1); + const unsigned EltBitWidth = Op.getValueSizeInBits(); + + // Remove low part of known bits mask + Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth); + Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth); + + // Remove high part of known bit mask + Known = Known.trunc(EltBitWidth); + break; + } + case ISD::EXTRACT_VECTOR_ELT: { + SDValue InVec = Op.getOperand(0); + SDValue EltNo = Op.getOperand(1); + EVT VecVT = InVec.getValueType(); + const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); + const unsigned NumSrcElts = VecVT.getVectorNumElements(); + // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know + // anything about the extended bits. + if (BitWidth > EltBitWidth) + Known = Known.trunc(EltBitWidth); + ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) { + // If we know the element index, just demand that vector element. + unsigned Idx = ConstEltNo->getZExtValue(); + APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); + Known = computeKnownBits(InVec, DemandedElt, Depth + 1); + } else { + // Unknown element index, so ignore DemandedElts and demand them all. + Known = computeKnownBits(InVec, Depth + 1); + } + if (BitWidth > EltBitWidth) + Known = Known.zext(BitWidth, false /* => any extend */); + break; + } + case ISD::INSERT_VECTOR_ELT: { + SDValue InVec = Op.getOperand(0); + SDValue InVal = Op.getOperand(1); + SDValue EltNo = Op.getOperand(2); + + ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { + // If we know the element index, split the demand between the + // source vector and the inserted element. + Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth); + unsigned EltIdx = CEltNo->getZExtValue(); + + // If we demand the inserted element then add its common known bits. + if (DemandedElts[EltIdx]) { + Known2 = computeKnownBits(InVal, Depth + 1); + Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); + } + + // If we demand the source vector then add its common known bits, ensuring + // that we don't demand the inserted element. + APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx)); + if (!!VectorElts) { + Known2 = computeKnownBits(InVec, VectorElts, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + } else { + // Unknown element index, so ignore DemandedElts and demand them all. + Known = computeKnownBits(InVec, Depth + 1); + Known2 = computeKnownBits(InVal, Depth + 1); + Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); + } + break; + } + case ISD::BITREVERSE: { + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known.Zero = Known2.Zero.reverseBits(); + Known.One = Known2.One.reverseBits(); + break; + } + case ISD::BSWAP: { + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known.Zero = Known2.Zero.byteSwap(); + Known.One = Known2.One.byteSwap(); + break; + } + case ISD::ABS: { + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // If the source's MSB is zero then we know the rest of the bits already. + if (Known2.isNonNegative()) { + Known.Zero = Known2.Zero; + Known.One = Known2.One; + break; + } + + // We only know that the absolute values's MSB will be zero iff there is + // a set bit that isn't the sign bit (otherwise it could be INT_MIN). + Known2.One.clearSignBit(); + if (Known2.One.getBoolValue()) { + Known.Zero = APInt::getSignMask(BitWidth); + break; + } + break; + } + case ISD::UMIN: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // UMIN - we know that the result will have the maximum of the + // known zero leading bits of the inputs. + unsigned LeadZero = Known.countMinLeadingZeros(); + LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros()); + + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; + Known.Zero.setHighBits(LeadZero); + break; + } + case ISD::UMAX: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // UMAX - we know that the result will have the maximum of the + // known one leading bits of the inputs. + unsigned LeadOne = Known.countMinLeadingOnes(); + LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes()); + + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; + Known.One.setHighBits(LeadOne); + break; + } + case ISD::SMIN: + case ISD::SMAX: { + // If we have a clamp pattern, we know that the number of sign bits will be + // the minimum of the clamp min/max range. + bool IsMax = (Opcode == ISD::SMAX); + ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; + if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) + if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) + CstHigh = + isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); + if (CstLow && CstHigh) { + if (!IsMax) + std::swap(CstLow, CstHigh); + + const APInt &ValueLow = CstLow->getAPIntValue(); + const APInt &ValueHigh = CstHigh->getAPIntValue(); + if (ValueLow.sle(ValueHigh)) { + unsigned LowSignBits = ValueLow.getNumSignBits(); + unsigned HighSignBits = ValueHigh.getNumSignBits(); + unsigned MinSignBits = std::min(LowSignBits, HighSignBits); + if (ValueLow.isNegative() && ValueHigh.isNegative()) { + Known.One.setHighBits(MinSignBits); + break; + } + if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) { + Known.Zero.setHighBits(MinSignBits); + break; + } + } + } + + // Fallback - just get the shared known bits of the operands. + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Known.isUnknown()) break; // Early-out + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; + break; + } + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); + break; + + default: + if (Opcode < ISD::BUILTIN_OP_END) + break; + LLVM_FALLTHROUGH; + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: + // Allow the target to implement this method for its nodes. + TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth); + break; + } + + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + return Known; +} + +SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, + SDValue N1) const { + // X + 0 never overflow + if (isNullConstant(N1)) + return OFK_Never; + + KnownBits N1Known = computeKnownBits(N1); + if (N1Known.Zero.getBoolValue()) { + KnownBits N0Known = computeKnownBits(N0); + + bool overflow; + (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow); + if (!overflow) + return OFK_Never; + } + + // mulhi + 1 never overflow + if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && + (~N1Known.Zero & 0x01) == ~N1Known.Zero) + return OFK_Never; + + if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) { + KnownBits N0Known = computeKnownBits(N0); + + if ((~N0Known.Zero & 0x01) == ~N0Known.Zero) + return OFK_Never; + } + + return OFK_Sometime; +} + +bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { + EVT OpVT = Val.getValueType(); + unsigned BitWidth = OpVT.getScalarSizeInBits(); + + // Is the constant a known power of 2? + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val)) + return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); + + // A left-shift of a constant one will have exactly one bit set because + // shifting the bit off the end is undefined. + if (Val.getOpcode() == ISD::SHL) { + auto *C = isConstOrConstSplat(Val.getOperand(0)); + if (C && C->getAPIntValue() == 1) + return true; + } + + // Similarly, a logical right-shift of a constant sign-bit will have exactly + // one bit set. + if (Val.getOpcode() == ISD::SRL) { + auto *C = isConstOrConstSplat(Val.getOperand(0)); + if (C && C->getAPIntValue().isSignMask()) + return true; + } + + // Are all operands of a build vector constant powers of two? + if (Val.getOpcode() == ISD::BUILD_VECTOR) + if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E)) + return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); + return false; + })) + return true; + + // More could be done here, though the above checks are enough + // to handle some common cases. + + // Fall back to computeKnownBits to catch other known cases. + KnownBits Known = computeKnownBits(Val); + return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1); +} + +unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return ComputeNumSignBits(Op, DemandedElts, Depth); +} + +unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, + unsigned Depth) const { + EVT VT = Op.getValueType(); + assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!"); + unsigned VTBits = VT.getScalarSizeInBits(); + unsigned NumElts = DemandedElts.getBitWidth(); + unsigned Tmp, Tmp2; + unsigned FirstAnswer = 1; + + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + const APInt &Val = C->getAPIntValue(); + return Val.getNumSignBits(); + } + + if (Depth >= MaxRecursionDepth) + return 1; // Limit search depth. + + if (!DemandedElts) + return 1; // No demanded elts, better to assume we don't know anything. + + unsigned Opcode = Op.getOpcode(); + switch (Opcode) { + default: break; + case ISD::AssertSext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::AssertZext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp; + + case ISD::BUILD_VECTOR: + Tmp = VTBits; + for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { + if (!DemandedElts[i]) + continue; + + SDValue SrcOp = Op.getOperand(i); + Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1); + + // BUILD_VECTOR can implicitly truncate sources, we must handle this. + if (SrcOp.getValueSizeInBits() != VTBits) { + assert(SrcOp.getValueSizeInBits() > VTBits && + "Expected BUILD_VECTOR implicit truncation"); + unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits; + Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1); + } + Tmp = std::min(Tmp, Tmp2); + } + return Tmp; + + case ISD::VECTOR_SHUFFLE: { + // Collect the minimum number of sign bits that are shared by every vector + // element referenced by the shuffle. + APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); + assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); + for (unsigned i = 0; i != NumElts; ++i) { + int M = SVN->getMaskElt(i); + if (!DemandedElts[i]) + continue; + // For UNDEF elements, we don't know anything about the common state of + // the shuffle result. + if (M < 0) + return 1; + if ((unsigned)M < NumElts) + DemandedLHS.setBit((unsigned)M % NumElts); + else + DemandedRHS.setBit((unsigned)M % NumElts); + } + Tmp = std::numeric_limits<unsigned>::max(); + if (!!DemandedLHS) + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); + if (!!DemandedRHS) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + // If we don't know anything, early out and try computeKnownBits fall-back. + if (Tmp == 1) + break; + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + + case ISD::BITCAST: { + SDValue N0 = Op.getOperand(0); + EVT SrcVT = N0.getValueType(); + unsigned SrcBits = SrcVT.getScalarSizeInBits(); + + // Ignore bitcasts from unsupported types.. + if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint())) + break; + + // Fast handling of 'identity' bitcasts. + if (VTBits == SrcBits) + return ComputeNumSignBits(N0, DemandedElts, Depth + 1); + + bool IsLE = getDataLayout().isLittleEndian(); + + // Bitcast 'large element' scalar/vector to 'small element' vector. + if ((SrcBits % VTBits) == 0) { + assert(VT.isVector() && "Expected bitcast to vector"); + + unsigned Scale = SrcBits / VTBits; + APInt SrcDemandedElts(NumElts / Scale, 0); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SrcDemandedElts.setBit(i / Scale); + + // Fast case - sign splat can be simply split across the small elements. + Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1); + if (Tmp == SrcBits) + return VTBits; + + // Slow case - determine how far the sign extends into each sub-element. + Tmp2 = VTBits; + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned SubOffset = i % Scale; + SubOffset = (IsLE ? ((Scale - 1) - SubOffset) : SubOffset); + SubOffset = SubOffset * VTBits; + if (Tmp <= SubOffset) + return 1; + Tmp2 = std::min(Tmp2, Tmp - SubOffset); + } + return Tmp2; + } + break; + } + + case ISD::SIGN_EXTEND: + Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); + return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp; + case ISD::SIGN_EXTEND_INREG: + // Max of the input and what this extends. + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits(); + Tmp = VTBits-Tmp+1; + Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); + return std::max(Tmp, Tmp2); + case ISD::SIGN_EXTEND_VECTOR_INREG: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(SrcVT.getVectorNumElements()); + Tmp = VTBits - SrcVT.getScalarSizeInBits(); + return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp; + } + + case ISD::SRA: + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); + // SRA X, C -> adds C sign bits. + if (ConstantSDNode *C = + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { + APInt ShiftVal = C->getAPIntValue(); + ShiftVal += Tmp; + Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); + } + return Tmp; + case ISD::SHL: + if (ConstantSDNode *C = + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); + if (C->getAPIntValue().uge(VTBits) || // Bad shift. + C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out. + return Tmp - C->getZExtValue(); + } + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // computeKnownBits, and pick whichever answer is better. + } + break; + + case ISD::SELECT: + case ISD::VSELECT: + Tmp = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); + return std::min(Tmp, Tmp2); + case ISD::SELECT_CC: + Tmp = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth+1); + return std::min(Tmp, Tmp2); + + case ISD::SMIN: + case ISD::SMAX: { + // If we have a clamp pattern, we know that the number of sign bits will be + // the minimum of the clamp min/max range. + bool IsMax = (Opcode == ISD::SMAX); + ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; + if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) + if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) + CstHigh = + isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); + if (CstLow && CstHigh) { + if (!IsMax) + std::swap(CstLow, CstHigh); + if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) { + Tmp = CstLow->getAPIntValue().getNumSignBits(); + Tmp2 = CstHigh->getAPIntValue().getNumSignBits(); + return std::min(Tmp, Tmp2); + } + } + + // Fallback - just get the minimum number of sign bits of the operands. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (Tmp == 1) + return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + return std::min(Tmp, Tmp2); + } + case ISD::UMIN: + case ISD::UMAX: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (Tmp == 1) + return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + return std::min(Tmp, Tmp2); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + break; + // The boolean result conforms to getBooleanContents. Fall through. + // If setcc returns 0/-1, all bits are sign bits. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(VT.isVector(), false) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; + case ISD::SETCC: + // If setcc returns 0/-1, all bits are sign bits. + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned RotAmt = C->getAPIntValue().urem(VTBits); + + // Handle rotate right by N like a rotate left by 32-N. + if (Opcode == ISD::ROTR) + RotAmt = (VTBits - RotAmt) % VTBits; + + // If we aren't rotating out all of the known-in sign bits, return the + // number that are left. This handles rotl(sext(x), 1) for example. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt); + } + break; + case ISD::ADD: + case ISD::ADDC: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + if (CRHS->isAllOnesValue()) { + KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((Known.Zero | 1).isAllOnesValue()) + return VTBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (Known.isNonNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + + case ISD::SUB: + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) + if (CLHS->isNullValue()) { + KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((Known.Zero | 1).isAllOnesValue()) + return VTBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (Known.isNonNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + case ISD::MUL: { + // The output of the Mul can be at most twice the valid bits in the inputs. + unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (SignBitsOp0 == 1) + break; + unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + if (SignBitsOp1 == 1) + break; + unsigned OutValidBits = + (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); + return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; + } + case ISD::TRUNCATE: { + // Check if the sign bits of source go down as far as the truncated value. + unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); + unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (NumSrcSignBits > (NumSrcBits - VTBits)) + return NumSrcSignBits - (NumSrcBits - VTBits); + break; + } + case ISD::EXTRACT_ELEMENT: { + const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); + const int BitWidth = Op.getValueSizeInBits(); + const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth; + + // Get reverse index (starting from 1), Op1 value indexes elements from + // little end. Sign starts at big end. + const int rIndex = Items - 1 - Op.getConstantOperandVal(1); + + // If the sign portion ends in our element the subtraction gives correct + // result. Otherwise it gives either negative or > bitwidth result + return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); + } + case ISD::INSERT_VECTOR_ELT: { + SDValue InVec = Op.getOperand(0); + SDValue InVal = Op.getOperand(1); + SDValue EltNo = Op.getOperand(2); + + ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { + // If we know the element index, split the demand between the + // source vector and the inserted element. + unsigned EltIdx = CEltNo->getZExtValue(); + + // If we demand the inserted element then get its sign bits. + Tmp = std::numeric_limits<unsigned>::max(); + if (DemandedElts[EltIdx]) { + // TODO - handle implicit truncation of inserted elements. + if (InVal.getScalarValueSizeInBits() != VTBits) + break; + Tmp = ComputeNumSignBits(InVal, Depth + 1); + } + + // If we demand the source vector then get its sign bits, and determine + // the minimum. + APInt VectorElts = DemandedElts; + VectorElts.clearBit(EltIdx); + if (!!VectorElts) { + Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + } else { + // Unknown element index, so ignore DemandedElts and demand them all. + Tmp = ComputeNumSignBits(InVec, Depth + 1); + Tmp2 = ComputeNumSignBits(InVal, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + case ISD::EXTRACT_VECTOR_ELT: { + SDValue InVec = Op.getOperand(0); + SDValue EltNo = Op.getOperand(1); + EVT VecVT = InVec.getValueType(); + const unsigned BitWidth = Op.getValueSizeInBits(); + const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); + const unsigned NumSrcElts = VecVT.getVectorNumElements(); + + // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know + // anything about sign bits. But if the sizes match we can derive knowledge + // about sign bits from the vector operand. + if (BitWidth != EltBitWidth) + break; + + // If we know the element index, just demand that vector element, else for + // an unknown element index, ignore DemandedElts and demand them all. + APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) + DemandedSrcElts = + APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); + + return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); + } + case ISD::EXTRACT_SUBVECTOR: { + // If we know the element index, just demand that subvector elements, + // otherwise demand them all. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + } + return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + } + case ISD::CONCAT_VECTORS: { + // Determine the minimum number of sign bits across all demanded + // elts of the input vectors. Early out if the result is already 1. + Tmp = std::numeric_limits<unsigned>::max(); + EVT SubVectorVT = Op.getOperand(0).getValueType(); + unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); + unsigned NumSubVectors = Op.getNumOperands(); + for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) { + APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts); + DemandedSub = DemandedSub.trunc(NumSubVectorElts); + if (!DemandedSub) + continue; + Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + case ISD::INSERT_SUBVECTOR: { + // If we know the element index, demand any elements from the subvector and + // the remainder from the src its inserted into, otherwise demand them all. + SDValue Src = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { + Tmp = std::numeric_limits<unsigned>::max(); + uint64_t Idx = SubIdx->getZExtValue(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + if (!!DemandedSubElts) { + Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); + if (Tmp == 1) return 1; // early-out + } + APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); + APInt DemandedSrcElts = DemandedElts & ~SubMask; + if (!!DemandedSrcElts) { + Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + + // Not able to determine the index so just assume worst case. + Tmp = ComputeNumSignBits(Sub, Depth + 1); + if (Tmp == 1) return 1; // early-out + Tmp2 = ComputeNumSignBits(Src, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + } + + // If we are looking at the loaded value of the SDNode. + if (Op.getResNo() == 0) { + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known. + Tmp = LD->getMemoryVT().getScalarSizeInBits(); + return VTBits - Tmp + 1; + case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known. + Tmp = LD->getMemoryVT().getScalarSizeInBits(); + return VTBits - Tmp; + case ISD::NON_EXTLOAD: + if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) { + // We only need to handle vectors - computeKnownBits should handle + // scalar cases. + Type *CstTy = Cst->getType(); + if (CstTy->isVectorTy() && + (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) { + Tmp = VTBits; + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + if (Constant *Elt = Cst->getAggregateElement(i)) { + if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { + const APInt &Value = CInt->getValue(); + Tmp = std::min(Tmp, Value.getNumSignBits()); + continue; + } + if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { + APInt Value = CFP->getValueAPF().bitcastToAPInt(); + Tmp = std::min(Tmp, Value.getNumSignBits()); + continue; + } + } + // Unknown type. Conservatively assume no bits match sign bit. + return 1; + } + return Tmp; + } + } + break; + } + } + } + + // Allow the target to implement this method for its nodes. + if (Opcode >= ISD::BUILTIN_OP_END || + Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::INTRINSIC_VOID) { + unsigned NumBits = + TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth); + if (NumBits > 1) + FirstAnswer = std::max(FirstAnswer, NumBits); + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + KnownBits Known = computeKnownBits(Op, DemandedElts, Depth); + + APInt Mask; + if (Known.isNonNegative()) { // sign bit is 0 + Mask = Known.Zero; + } else if (Known.isNegative()) { // sign bit is 1; + Mask = Known.One; + } else { + // Nothing known. + return FirstAnswer; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask = ~Mask; + Mask <<= Mask.getBitWidth()-VTBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); +} + +bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { + if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || + !isa<ConstantSDNode>(Op.getOperand(1))) + return false; + + if (Op.getOpcode() == ISD::OR && + !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1))) + return false; + + return true; +} + +bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const { + // If we're told that NaNs won't happen, assume they won't. + if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs()) + return true; + + if (Depth >= MaxRecursionDepth) + return false; // Limit search depth. + + // TODO: Handle vectors. + // If the value is a constant, we can obviously see if it is a NaN or not. + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) { + return !C->getValueAPF().isNaN() || + (SNaN && !C->getValueAPF().isSignaling()); + } + + unsigned Opcode = Op.getOpcode(); + switch (Opcode) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::FSIN: + case ISD::FCOS: { + if (SNaN) + return true; + // TODO: Need isKnownNeverInfinity + return false; + } + case ISD::FCANONICALIZE: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FROUND: + case ISD::FRINT: + case ISD::FNEARBYINT: { + if (SNaN) + return true; + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); + } + case ISD::FABS: + case ISD::FNEG: + case ISD::FCOPYSIGN: { + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); + } + case ISD::SELECT: + return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && + isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); + case ISD::FP_EXTEND: + case ISD::FP_ROUND: { + if (SNaN) + return true; + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); + } + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return true; + case ISD::FMA: + case ISD::FMAD: { + if (SNaN) + return true; + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && + isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && + isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); + } + case ISD::FSQRT: // Need is known positive + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FPOWI: + case ISD::FPOW: { + if (SNaN) + return true; + // TODO: Refine on operand + return false; + } + case ISD::FMINNUM: + case ISD::FMAXNUM: { + // Only one needs to be known not-nan, since it will be returned if the + // other ends up being one. + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) || + isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); + } + case ISD::FMINNUM_IEEE: + case ISD::FMAXNUM_IEEE: { + if (SNaN) + return true; + // This can return a NaN if either operand is an sNaN, or if both operands + // are NaN. + return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) && + isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) || + (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) && + isKnownNeverSNaN(Op.getOperand(0), Depth + 1)); + } + case ISD::FMINIMUM: + case ISD::FMAXIMUM: { + // TODO: Does this quiet or return the origina NaN as-is? + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && + isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); + } + case ISD::EXTRACT_VECTOR_ELT: { + return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); + } + default: + if (Opcode >= ISD::BUILTIN_OP_END || + Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::INTRINSIC_VOID) { + return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth); + } + + return false; + } +} + +bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { + assert(Op.getValueType().isFloatingPoint() && + "Floating point type expected"); + + // If the value is a constant, we can obviously see if it is a zero or not. + // TODO: Add BuildVector support. + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) + return !C->isZero(); + return false; +} + +bool SelectionDAG::isKnownNeverZero(SDValue Op) const { + assert(!Op.getValueType().isFloatingPoint() && + "Floating point types unsupported - use isKnownNeverZeroFloat"); + + // If the value is a constant, we can obviously see if it is a zero or not. + if (ISD::matchUnaryPredicate( + Op, [](ConstantSDNode *C) { return !C->isNullValue(); })) + return true; + + // TODO: Recognize more cases here. + switch (Op.getOpcode()) { + default: break; + case ISD::OR: + if (isKnownNeverZero(Op.getOperand(1)) || + isKnownNeverZero(Op.getOperand(0))) + return true; + break; + } + + return false; +} + +bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { + // Check the obvious case. + if (A == B) return true; + + // For for negative and positive zero. + if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) + if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) + if (CA->isZero() && CB->isZero()) return true; + + // Otherwise they may not be equal. + return false; +} + +// FIXME: unify with llvm::haveNoCommonBitsSet. +// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M) +bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { + assert(A.getValueType() == B.getValueType() && + "Values must have the same type"); + return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue(); +} + +static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, + ArrayRef<SDValue> Ops, + SelectionDAG &DAG) { + int NumOps = Ops.size(); + assert(NumOps != 0 && "Can't build an empty vector!"); + assert(VT.getVectorNumElements() == (unsigned)NumOps && + "Incorrect element count in BUILD_VECTOR!"); + + // BUILD_VECTOR of UNDEFs is UNDEF. + if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) + return DAG.getUNDEF(VT); + + // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity. + SDValue IdentitySrc; + bool IsIdentity = true; + for (int i = 0; i != NumOps; ++i) { + if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Ops[i].getOperand(0).getValueType() != VT || + (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) || + !isa<ConstantSDNode>(Ops[i].getOperand(1)) || + cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i) { + IsIdentity = false; + break; + } + IdentitySrc = Ops[i].getOperand(0); + } + if (IsIdentity) + return IdentitySrc; + + return SDValue(); +} + +/// Try to simplify vector concatenation to an input value, undef, or build +/// vector. +static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, + ArrayRef<SDValue> Ops, + SelectionDAG &DAG) { + assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); + assert(llvm::all_of(Ops, + [Ops](SDValue Op) { + return Ops[0].getValueType() == Op.getValueType(); + }) && + "Concatenation of vectors with inconsistent value types!"); + assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) == + VT.getVectorNumElements() && + "Incorrect element count in vector concatenation!"); + + if (Ops.size() == 1) + return Ops[0]; + + // Concat of UNDEFs is UNDEF. + if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) + return DAG.getUNDEF(VT); + + // Scan the operands and look for extract operations from a single source + // that correspond to insertion at the same location via this concatenation: + // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ... + SDValue IdentitySrc; + bool IsIdentity = true; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + SDValue Op = Ops[i]; + unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements(); + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Op.getOperand(0).getValueType() != VT || + (IdentitySrc && Op.getOperand(0) != IdentitySrc) || + !isa<ConstantSDNode>(Op.getOperand(1)) || + Op.getConstantOperandVal(1) != IdentityIndex) { + IsIdentity = false; + break; + } + assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) && + "Unexpected identity source vector for concat of extracts"); + IdentitySrc = Op.getOperand(0); + } + if (IsIdentity) { + assert(IdentitySrc && "Failed to set source vector of extracts"); + return IdentitySrc; + } + + // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be + // simplified to one big BUILD_VECTOR. + // FIXME: Add support for SCALAR_TO_VECTOR as well. + EVT SVT = VT.getScalarType(); + SmallVector<SDValue, 16> Elts; + for (SDValue Op : Ops) { + EVT OpVT = Op.getValueType(); + if (Op.isUndef()) + Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT)); + else if (Op.getOpcode() == ISD::BUILD_VECTOR) + Elts.append(Op->op_begin(), Op->op_end()); + else + return SDValue(); + } + + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + for (SDValue Op : Elts) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + + if (SVT.bitsGT(VT.getScalarType())) + for (SDValue &Op : Elts) + Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, DL, SVT) + : DAG.getSExtOrTrunc(Op, DL, SVT); + + SDValue V = DAG.getBuildVector(VT, DL, Elts); + NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG); + return V; +} + +/// Gets or creates the specified node. +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, getVTList(VT), None); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), + getVTList(VT)); + CSEMap.InsertNode(N, IP); + + InsertNode(N); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue Operand, const SDNodeFlags Flags) { + // Constant fold unary operations with an integer constant operand. Even + // opaque constant will be folded, because the folding of unary operations + // doesn't create new constants with different values. Nevertheless, the + // opaque flag is preserved during folding to prevent future folding with + // other constants. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) { + const APInt &Val = C->getAPIntValue(); + switch (Opcode) { + default: break; + case ISD::SIGN_EXTEND: + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + case ISD::TRUNCATE: + if (C->isOpaque()) + break; + LLVM_FALLTHROUGH; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + APFloat apf(EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); + (void)apf.convertFromAPInt(Val, + Opcode==ISD::SINT_TO_FP, + APFloat::rmNearestTiesToEven); + return getConstantFP(apf, DL, VT); + } + case ISD::BITCAST: + if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) + return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT); + if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) + return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT); + if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT); + if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) + return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); + break; + case ISD::ABS: + return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::BITREVERSE: + return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::BSWAP: + return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTPOP: + return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::FP16_TO_FP: { + bool Ignored; + APFloat FPV(APFloat::IEEEhalf(), + (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); + + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)FPV.convert(EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &Ignored); + return getConstantFP(FPV, DL, VT); + } + } + } + + // Constant fold unary operations with a floating point constant operand. + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) { + APFloat V = C->getValueAPF(); // make copy + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, DL, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, DL, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + break; + } + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, DL, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + bool ignored; + APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = + V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored); + if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual + break; + return getConstant(IntVal, DL, VT); + } + case ISD::BITCAST: + if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT); + else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); + break; + case ISD::FP_TO_FP16: { + bool Ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(APFloat::IEEEhalf(), + APFloat::rmNearestTiesToEven, &Ignored); + return getConstant(V.bitcastToAPInt(), DL, VT); + } + } + } + + // Constant fold unary operations with a vector integer or float operand. + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) { + if (BV->isConstant()) { + switch (Opcode) { + default: + // FIXME: Entirely reasonable to perform folding of other unary + // operations here as the need arises. + break; + case ISD::FNEG: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::TRUNCATE: + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + case ISD::ABS: + case ISD::BITREVERSE: + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: { + SDValue Ops = { Operand }; + if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + return Fold; + } + } + } + } + + unsigned OpOpcode = Operand.getNode()->getOpcode(); + switch (Opcode) { + case ISD::TokenFactor: + case ISD::MERGE_VALUES: + case ISD::CONCAT_VECTORS: + return Operand; // Factor, merge or concat of one node? No need. + case ISD::BUILD_VECTOR: { + // Attempt to simplify BUILD_VECTOR. + SDValue Ops[] = {Operand}; + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; + } + case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node"); + case ISD::FP_EXTEND: + assert(VT.isFloatingPoint() && + Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); + if (Operand.getValueType() == VT) return Operand; // noop conversion. + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid fpext node, dst < src!"); + if (Operand.isUndef()) + return getUNDEF(VT); + break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + if (Operand.isUndef()) + return getUNDEF(VT); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // [us]itofp(undef) = 0, because the result value is bounded. + if (Operand.isUndef()) + return getConstantFP(0.0, DL, VT); + break; + case ISD::SIGN_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid SIGN_EXTEND!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "SIGN_EXTEND result type type should be vector iff the operand " + "type is vector!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid sext node, dst < src!"); + if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) + return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // sext(undef) = 0, because the top bits will all be the same. + return getConstant(0, DL, VT); + break; + case ISD::ZERO_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ZERO_EXTEND!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "ZERO_EXTEND result type type should be vector iff the operand " + "type is vector!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid zext node, dst < src!"); + if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) + return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // zext(undef) = 0, because the top bits will be zero. + return getConstant(0, DL, VT); + break; + case ISD::ANY_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ANY_EXTEND!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "ANY_EXTEND result type type should be vector iff the operand " + "type is vector!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid anyext node, dst < src!"); + + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) + // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) + return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + + // (ext (trunc x)) -> x + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = Operand.getOperand(0); + if (OpOp.getValueType() == VT) { + transferDbgValues(Operand, OpOp); + return OpOp; + } + } + break; + case ISD::TRUNCATE: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid TRUNCATE!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "TRUNCATE result type type should be vector iff the operand " + "type is vector!"); + if (Operand.getValueType() == VT) return Operand; // noop truncate + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Operand.getValueType().bitsGT(VT) && + "Invalid truncate node, src < dst!"); + if (OpOpcode == ISD::TRUNCATE) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0)); + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { + // If the source is smaller than the dest, we still need an extend. + if (Operand.getOperand(0).getValueType().getScalarType() + .bitsLT(VT.getScalarType())) + return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); + if (Operand.getOperand(0).getValueType().bitsGT(VT)) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0)); + return Operand.getOperand(0); + } + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(Operand.getValueType().bitsLE(VT) && + "The input must be the same size or smaller than the result."); + assert(VT.getVectorNumElements() < + Operand.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + break; + case ISD::ABS: + assert(VT.isInteger() && VT == Operand.getValueType() && + "Invalid ABS!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::BSWAP: + assert(VT.isInteger() && VT == Operand.getValueType() && + "Invalid BSWAP!"); + assert((VT.getScalarSizeInBits() % 16 == 0) && + "BSWAP types must be a multiple of 16 bits!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::BITREVERSE: + assert(VT.isInteger() && VT == Operand.getValueType() && + "Invalid BITREVERSE!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::BITCAST: + // Basic sanity checking. + assert(VT.getSizeInBits() == Operand.getValueSizeInBits() && + "Cannot BITCAST between types of different sizes!"); + if (VT == Operand.getValueType()) return Operand; // noop conversion. + if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x) + return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0)); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SCALAR_TO_VECTOR: + assert(VT.isVector() && !Operand.getValueType().isVector() && + (VT.getVectorElementType() == Operand.getValueType() || + (VT.getVectorElementType().isInteger() && + Operand.getValueType().isInteger() && + VT.getVectorElementType().bitsLE(Operand.getValueType()))) && + "Illegal SCALAR_TO_VECTOR node!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined. + if (OpOpcode == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(Operand.getOperand(1)) && + Operand.getConstantOperandVal(1) == 0 && + Operand.getOperand(0).getValueType() == VT) + return Operand.getOperand(0); + break; + case ISD::FNEG: + // Negation of an unknown bag of bits is still completely undefined. + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 + if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && + OpOpcode == ISD::FSUB) + return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), + Operand.getOperand(0), Flags); + if (OpOpcode == ISD::FNEG) // --X -> X + return Operand.getOperand(0); + break; + case ISD::FABS: + if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) + return getNode(ISD::FABS, DL, VT, Operand.getOperand(0)); + break; + } + + SDNode *N; + SDVTList VTs = getVTList(VT); + SDValue Ops[] = {Operand}; + if (VT != MVT::Glue) { // Don't CSE flag producing nodes + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + E->intersectFlagsWith(Flags); + return SDValue(E, 0); + } + + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + } else { + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); + } + + InsertNode(N); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, + const APInt &C2) { + switch (Opcode) { + case ISD::ADD: return std::make_pair(C1 + C2, true); + case ISD::SUB: return std::make_pair(C1 - C2, true); + case ISD::MUL: return std::make_pair(C1 * C2, true); + case ISD::AND: return std::make_pair(C1 & C2, true); + case ISD::OR: return std::make_pair(C1 | C2, true); + case ISD::XOR: return std::make_pair(C1 ^ C2, true); + case ISD::SHL: return std::make_pair(C1 << C2, true); + case ISD::SRL: return std::make_pair(C1.lshr(C2), true); + case ISD::SRA: return std::make_pair(C1.ashr(C2), true); + case ISD::ROTL: return std::make_pair(C1.rotl(C2), true); + case ISD::ROTR: return std::make_pair(C1.rotr(C2), true); + case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true); + case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true); + case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true); + case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true); + case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true); + case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true); + case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true); + case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true); + case ISD::UDIV: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.udiv(C2), true); + case ISD::UREM: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.urem(C2), true); + case ISD::SDIV: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.sdiv(C2), true); + case ISD::SREM: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.srem(C2), true); + } + return std::make_pair(APInt(1, 0), false); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, + EVT VT, const ConstantSDNode *C1, + const ConstantSDNode *C2) { + if (C1->isOpaque() || C2->isOpaque()) + return SDValue(); + + std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(), + C2->getAPIntValue()); + if (!Folded.second) + return SDValue(); + return getConstant(Folded.first, DL, VT); +} + +SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, + const GlobalAddressSDNode *GA, + const SDNode *N2) { + if (GA->getOpcode() != ISD::GlobalAddress) + return SDValue(); + if (!TLI->isOffsetFoldingLegal(GA)) + return SDValue(); + auto *C2 = dyn_cast<ConstantSDNode>(N2); + if (!C2) + return SDValue(); + int64_t Offset = C2->getSExtValue(); + switch (Opcode) { + case ISD::ADD: break; + case ISD::SUB: Offset = -uint64_t(Offset); break; + default: return SDValue(); + } + return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT, + GA->getOffset() + uint64_t(Offset)); +} + +bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { + switch (Opcode) { + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: { + // If a divisor is zero/undef or any element of a divisor vector is + // zero/undef, the whole op is undef. + assert(Ops.size() == 2 && "Div/rem should have 2 operands"); + SDValue Divisor = Ops[1]; + if (Divisor.isUndef() || isNullConstant(Divisor)) + return true; + + return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && + llvm::any_of(Divisor->op_values(), + [](SDValue V) { return V.isUndef() || + isNullConstant(V); }); + // TODO: Handle signed overflow. + } + // TODO: Handle oversized shifts. + default: + return false; + } +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, + EVT VT, SDNode *N1, SDNode *N2) { + // If the opcode is a target-specific ISD node, there's nothing we can + // do here and the operand rules may not line up with the below, so + // bail early. + if (Opcode >= ISD::BUILTIN_OP_END) + return SDValue(); + + if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)})) + return getUNDEF(VT); + + // Handle the case of two scalars. + if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) { + if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) { + SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2); + assert((!Folded || !VT.isVector()) && + "Can't fold vectors ops with scalar operands"); + return Folded; + } + } + + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1)) + return FoldSymbolOffset(Opcode, VT, GA, N2); + if (TLI->isCommutativeBinOp(Opcode)) + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) + return FoldSymbolOffset(Opcode, VT, GA, N1); + + // For vectors, extract each constant element and fold them individually. + // Either input may be an undef value. + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + if (!BV1 && !N1->isUndef()) + return SDValue(); + auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); + if (!BV2 && !N2->isUndef()) + return SDValue(); + // If both operands are undef, that's handled the same way as scalars. + if (!BV1 && !BV2) + return SDValue(); + + assert((!BV1 || !BV2 || BV1->getNumOperands() == BV2->getNumOperands()) && + "Vector binop with different number of elements in operands?"); + + EVT SVT = VT.getScalarType(); + EVT LegalSVT = SVT; + if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { + LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); + if (LegalSVT.bitsLT(SVT)) + return SDValue(); + } + SmallVector<SDValue, 4> Outputs; + unsigned NumOps = BV1 ? BV1->getNumOperands() : BV2->getNumOperands(); + for (unsigned I = 0; I != NumOps; ++I) { + SDValue V1 = BV1 ? BV1->getOperand(I) : getUNDEF(SVT); + SDValue V2 = BV2 ? BV2->getOperand(I) : getUNDEF(SVT); + if (SVT.isInteger()) { + if (V1->getValueType(0).bitsGT(SVT)) + V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); + if (V2->getValueType(0).bitsGT(SVT)) + V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); + } + + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + return SDValue(); + + // Fold one vector element. + SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2); + if (LegalSVT != SVT) + ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); + + // Scalar folding only succeeded if the result is a constant or UNDEF. + if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && + ScalarResult.getOpcode() != ISD::ConstantFP) + return SDValue(); + Outputs.push_back(ScalarResult); + } + + assert(VT.getVectorNumElements() == Outputs.size() && + "Vector size mismatch!"); + + // We may have a vector type but a scalar result. Create a splat. + Outputs.resize(VT.getVectorNumElements(), Outputs.back()); + + // Build a big vector out of the scalar elements we generated. + return getBuildVector(VT, SDLoc(), Outputs); +} + +// TODO: Merge with FoldConstantArithmetic +SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, + const SDLoc &DL, EVT VT, + ArrayRef<SDValue> Ops, + const SDNodeFlags Flags) { + // If the opcode is a target-specific ISD node, there's nothing we can + // do here and the operand rules may not line up with the below, so + // bail early. + if (Opcode >= ISD::BUILTIN_OP_END) + return SDValue(); + + if (isUndef(Opcode, Ops)) + return getUNDEF(VT); + + // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? + if (!VT.isVector()) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + + auto IsScalarOrSameVectorSize = [&](const SDValue &Op) { + return !Op.getValueType().isVector() || + Op.getValueType().getVectorNumElements() == NumElts; + }; + + auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) { + BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op); + return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE) || + (BV && BV->isConstant()); + }; + + // All operands must be vector types with the same number of elements as + // the result type and must be either UNDEF or a build vector of constant + // or UNDEF scalars. + if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) || + !llvm::all_of(Ops, IsScalarOrSameVectorSize)) + return SDValue(); + + // If we are comparing vectors, then the result needs to be a i1 boolean + // that is then sign-extended back to the legal result type. + EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType()); + + // Find legal integer scalar type for constant promotion and + // ensure that its scalar size is at least as large as source. + EVT LegalSVT = VT.getScalarType(); + if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { + LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); + if (LegalSVT.bitsLT(VT.getScalarType())) + return SDValue(); + } + + // Constant fold each scalar lane separately. + SmallVector<SDValue, 4> ScalarResults; + for (unsigned i = 0; i != NumElts; i++) { + SmallVector<SDValue, 4> ScalarOps; + for (SDValue Op : Ops) { + EVT InSVT = Op.getValueType().getScalarType(); + BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op); + if (!InBV) { + // We've checked that this is UNDEF or a constant of some kind. + if (Op.isUndef()) + ScalarOps.push_back(getUNDEF(InSVT)); + else + ScalarOps.push_back(Op); + continue; + } + + SDValue ScalarOp = InBV->getOperand(i); + EVT ScalarVT = ScalarOp.getValueType(); + + // Build vector (integer) scalar operands may need implicit + // truncation - do this before constant folding. + if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) + ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp); + + ScalarOps.push_back(ScalarOp); + } + + // Constant fold the scalar operands. + SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); + + // Legalize the (integer) scalar constant if necessary. + if (LegalSVT != SVT) + ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); + + // Scalar folding only succeeded if the result is a constant or UNDEF. + if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && + ScalarResult.getOpcode() != ISD::ConstantFP) + return SDValue(); + ScalarResults.push_back(ScalarResult); + } + + SDValue V = getBuildVector(VT, DL, ScalarResults); + NewSDValueDbgMsg(V, "New node fold constant vector: ", this); + return V; +} + +SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, + EVT VT, SDValue N1, SDValue N2) { + // TODO: We don't do any constant folding for strict FP opcodes here, but we + // should. That will require dealing with a potentially non-default + // rounding mode, checking the "opStatus" return value from the APFloat + // math calculations, and possibly other variations. + auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); + auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); + if (N1CFP && N2CFP) { + APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF(); + switch (Opcode) { + case ISD::FADD: + C1.add(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FSUB: + C1.subtract(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FMUL: + C1.multiply(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FDIV: + C1.divide(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FREM: + C1.mod(C2); + return getConstantFP(C1, DL, VT); + case ISD::FCOPYSIGN: + C1.copySign(C2); + return getConstantFP(C1, DL, VT); + default: break; + } + } + if (N1CFP && Opcode == ISD::FP_ROUND) { + APFloat C1 = N1CFP->getValueAPF(); // make copy + bool Unused; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void) C1.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + &Unused); + return getConstantFP(C1, DL, VT); + } + + switch (Opcode) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + // If both operands are undef, the result is undef. If 1 operand is undef, + // the result is NaN. This should match the behavior of the IR optimizer. + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); + if (N1.isUndef() || N2.isUndef()) + return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); + } + return SDValue(); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2, const SDNodeFlags Flags) { + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + + // Canonicalize constant to RHS if commutative. + if (TLI->isCommutativeBinOp(Opcode)) { + if (N1C && !N2C) { + std::swap(N1C, N2C); + std::swap(N1, N2); + } else if (N1CFP && !N2CFP) { + std::swap(N1CFP, N2CFP); + std::swap(N1, N2); + } + } + + switch (Opcode) { + default: break; + case ISD::TokenFactor: + assert(VT == MVT::Other && N1.getValueType() == MVT::Other && + N2.getValueType() == MVT::Other && "Invalid token factor!"); + // Fold trivial token factors. + if (N1.getOpcode() == ISD::EntryToken) return N2; + if (N2.getOpcode() == ISD::EntryToken) return N1; + if (N1 == N2) return N1; + break; + case ISD::BUILD_VECTOR: { + // Attempt to simplify BUILD_VECTOR. + SDValue Ops[] = {N1, N2}; + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; + } + case ISD::CONCAT_VECTORS: { + SDValue Ops[] = {N1, N2}; + if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) + return V; + break; + } + case ISD::AND: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's + // worth handling here. + if (N2C && N2C->isNullValue()) + return N2; + if (N2C && N2C->isAllOnesValue()) // X & -1 -> X + return N1; + break; + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so + // it's worth handling here. + if (N2C && N2C->isNullValue()) + return N1; + break; + case ISD::UDIV: + case ISD::UREM: + case ISD::MULHU: + case ISD::MULHS: + case ISD::MUL: + case ISD::SDIV: + case ISD::SREM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + case ISD::SADDSAT: + case ISD::SSUBSAT: + case ISD::UADDSAT: + case ISD::USUBSAT: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + if (SDValue V = simplifyFPBinop(Opcode, N1, N2)) + return V; + break; + case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. + assert(N1.getValueType() == VT && + N1.getValueType().isFloatingPoint() && + N2.getValueType().isFloatingPoint() && + "Invalid FCOPYSIGN!"); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + if (SDValue V = simplifyShift(N1, N2)) + return V; + LLVM_FALLTHROUGH; + case ISD::ROTL: + case ISD::ROTR: + assert(VT == N1.getValueType() && + "Shift operators return type must be the same as their first arg"); + assert(VT.isInteger() && N2.getValueType().isInteger() && + "Shifts only work on integers"); + assert((!VT.isVector() || VT == N2.getValueType()) && + "Vector shift amounts must be in the same as their first arg"); + // Verify that the shift amount VT is big enough to hold valid shift + // amounts. This catches things like trying to shift an i1024 value by an + // i8, which is easy to fall into in generic code that uses + // TLI.getShiftAmount(). + assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) && + "Invalid use of small shift amount with oversized value!"); + + // Always fold shifts of i1 values so the code generator doesn't need to + // handle them. Since we know the size of the shift has to be less than the + // size of the value, the shift/rotate count is guaranteed to be zero. + if (VT == MVT::i1) + return N1; + if (N2C && N2C->isNullValue()) + return N1; + break; + case ISD::FP_ROUND: + assert(VT.isFloatingPoint() && + N1.getValueType().isFloatingPoint() && + VT.bitsLE(N1.getValueType()) && + N2C && (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) && + "Invalid FP_ROUND!"); + if (N1.getValueType() == VT) return N1; // noop conversion. + break; + case ISD::AssertSext: + case ISD::AssertZext: { + EVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(!EVT.isVector() && + "AssertSExt/AssertZExt type should be the vector element type " + "rather than the vector type!"); + assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!"); + if (VT.getScalarType() == EVT) return N1; // noop assertion. + break; + } + case ISD::SIGN_EXTEND_INREG: { + EVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(EVT.isVector() == VT.isVector() && + "SIGN_EXTEND_INREG type should be vector iff the operand " + "type is vector!"); + assert((!EVT.isVector() || + EVT.getVectorNumElements() == VT.getVectorNumElements()) && + "Vector element counts must match in SIGN_EXTEND_INREG"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (EVT == VT) return N1; // Not actually extending + + auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) { + unsigned FromBits = EVT.getScalarSizeInBits(); + Val <<= Val.getBitWidth() - FromBits; + Val.ashrInPlace(Val.getBitWidth() - FromBits); + return getConstant(Val, DL, ConstantVT); + }; + + if (N1C) { + const APInt &Val = N1C->getAPIntValue(); + return SignExtendInReg(Val, VT); + } + if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { + SmallVector<SDValue, 8> Ops; + llvm::EVT OpVT = N1.getOperand(0).getValueType(); + for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + SDValue Op = N1.getOperand(i); + if (Op.isUndef()) { + Ops.push_back(getUNDEF(OpVT)); + continue; + } + ConstantSDNode *C = cast<ConstantSDNode>(Op); + APInt Val = C->getAPIntValue(); + Ops.push_back(SignExtendInReg(Val, OpVT)); + } + return getBuildVector(VT, DL, Ops); + } + break; + } + case ISD::EXTRACT_VECTOR_ELT: + assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() && + "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ + element type of the vector."); + + // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. + if (N1.isUndef()) + return getUNDEF(VT); + + // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF + if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) + return getUNDEF(VT); + + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is + // expanding copies of large vectors from registers. + if (N2C && + N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0) { + unsigned Factor = + N1.getOperand(0).getValueType().getVectorNumElements(); + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + N1.getOperand(N2C->getZExtValue() / Factor), + getConstant(N2C->getZExtValue() % Factor, DL, + N2.getValueType())); + } + + // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is + // expanding large vector constants. + if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { + SDValue Elt = N1.getOperand(N2C->getZExtValue()); + + if (VT != Elt.getValueType()) + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated, and the result implicitly + // extended. Make that explicit here. + Elt = getAnyExtOrTrunc(Elt, DL, VT); + + return Elt; + } + + // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector + // operations are lowered to scalars. + if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { + // If the indices are the same, return the inserted element else + // if the indices are known different, extract the element from + // the original vector. + SDValue N1Op2 = N1.getOperand(2); + ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2); + + if (N1Op2C && N2C) { + if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { + if (VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); + else + return getSExtOrTrunc(N1.getOperand(1), DL, VT); + } + + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); + } + } + + // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed + // when vector types are scalarized and v1iX is legal. + // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx) + if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getValueType().getVectorNumElements() == 1) { + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), + N1.getOperand(1)); + } + break; + case ISD::EXTRACT_ELEMENT: + assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); + assert(!N1.getValueType().isVector() && !VT.isVector() && + (N1.getValueType().isInteger() == VT.isInteger()) && + N1.getValueType() != VT && + "Wrong types for EXTRACT_ELEMENT!"); + + // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding + // 64-bit integers into 32-bit parts. Instead of building the extract of + // the BUILD_PAIR, only to have legalize rip it apart, just do it now. + if (N1.getOpcode() == ISD::BUILD_PAIR) + return N1.getOperand(N2C->getZExtValue()); + + // EXTRACT_ELEMENT of a constant int is also very common. + if (N1C) { + unsigned ElementSize = VT.getSizeInBits(); + unsigned Shift = ElementSize * N2C->getZExtValue(); + APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift); + return getConstant(ShiftedVal.trunc(ElementSize), DL, VT); + } + break; + case ISD::EXTRACT_SUBVECTOR: + if (VT.isSimple() && N1.getValueType().isSimple()) { + assert(VT.isVector() && N1.getValueType().isVector() && + "Extract subvector VTs must be a vectors!"); + assert(VT.getVectorElementType() == + N1.getValueType().getVectorElementType() && + "Extract subvector VTs must have the same element type!"); + assert(VT.getSimpleVT() <= N1.getSimpleValueType() && + "Extract subvector must be from larger vector to smaller vector!"); + + if (N2C) { + assert((VT.getVectorNumElements() + N2C->getZExtValue() + <= N1.getValueType().getVectorNumElements()) + && "Extract subvector overflow!"); + } + + // Trivial extraction. + if (VT.getSimpleVT() == N1.getSimpleValueType()) + return N1; + + // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. + if (N1.isUndef()) + return getUNDEF(VT); + + // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of + // the concat have the same type as the extract. + if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0 && + VT == N1.getOperand(0).getValueType()) { + unsigned Factor = VT.getVectorNumElements(); + return N1.getOperand(N2C->getZExtValue() / Factor); + } + + // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created + // during shuffle legalization. + if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && + VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); + } + break; + } + + // Perform trivial constant folding. + if (SDValue SV = + FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) + return SV; + + if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2)) + return V; + + // Canonicalize an UNDEF to the RHS, even over a constant. + if (N1.isUndef()) { + if (TLI->isCommutativeBinOp(Opcode)) { + std::swap(N1, N2); + } else { + switch (Opcode) { + case ISD::SIGN_EXTEND_INREG: + case ISD::SUB: + return getUNDEF(VT); // fold op(undef, arg2) -> undef + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::SSUBSAT: + case ISD::USUBSAT: + return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 + } + } + } + + // Fold a bunch of operators when the RHS is undef. + if (N2.isUndef()) { + switch (Opcode) { + case ISD::XOR: + if (N1.isUndef()) + // Handle undef ^ undef -> 0 special case. This is a common + // idiom (misuse). + return getConstant(0, DL, VT); + LLVM_FALLTHROUGH; + case ISD::ADD: + case ISD::SUB: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + return getUNDEF(VT); // fold op(arg1, undef) -> undef + case ISD::MUL: + case ISD::AND: + case ISD::SSUBSAT: + case ISD::USUBSAT: + return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 + case ISD::OR: + case ISD::SADDSAT: + case ISD::UADDSAT: + return getAllOnesConstant(DL, VT); + } + } + + // Memoize this node if possible. + SDNode *N; + SDVTList VTs = getVTList(VT); + SDValue Ops[] = {N1, N2}; + if (VT != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + E->intersectFlagsWith(Flags); + return SDValue(E, 0); + } + + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + } else { + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); + } + + InsertNode(N); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, + const SDNodeFlags Flags) { + // Perform various simplifications. + switch (Opcode) { + case ISD::FMA: { + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); + assert(N1.getValueType() == VT && N2.getValueType() == VT && + N3.getValueType() == VT && "FMA types must match!"); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); + if (N1CFP && N2CFP && N3CFP) { + APFloat V1 = N1CFP->getValueAPF(); + const APFloat &V2 = N2CFP->getValueAPF(); + const APFloat &V3 = N3CFP->getValueAPF(); + V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); + return getConstantFP(V1, DL, VT); + } + break; + } + case ISD::BUILD_VECTOR: { + // Attempt to simplify BUILD_VECTOR. + SDValue Ops[] = {N1, N2, N3}; + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; + } + case ISD::CONCAT_VECTORS: { + SDValue Ops[] = {N1, N2, N3}; + if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) + return V; + break; + } + case ISD::SETCC: { + assert(VT.isInteger() && "SETCC result type must be an integer!"); + assert(N1.getValueType() == N2.getValueType() && + "SETCC operands must have the same type!"); + assert(VT.isVector() == N1.getValueType().isVector() && + "SETCC type should be vector iff the operand type is vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) && + "SETCC vector element counts must match!"); + // Use FoldSetCC to simplify SETCC's. + if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) + return V; + // Vector constant folding. + SDValue Ops[] = {N1, N2, N3}; + if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) { + NewSDValueDbgMsg(V, "New node vector constant folding: ", this); + return V; + } + break; + } + case ISD::SELECT: + case ISD::VSELECT: + if (SDValue V = simplifySelect(N1, N2, N3)) + return V; + break; + case ISD::VECTOR_SHUFFLE: + llvm_unreachable("should use getVectorShuffle constructor!"); + case ISD::INSERT_VECTOR_ELT: { + ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3); + // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF + if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + return getUNDEF(VT); + break; + } + case ISD::INSERT_SUBVECTOR: { + // Inserting undef into undef is still undef. + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); + SDValue Index = N3; + if (VT.isSimple() && N1.getValueType().isSimple() + && N2.getValueType().isSimple()) { + assert(VT.isVector() && N1.getValueType().isVector() && + N2.getValueType().isVector() && + "Insert subvector VTs must be a vectors"); + assert(VT == N1.getValueType() && + "Dest and insert subvector source types must match!"); + assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && + "Insert subvector must be from smaller vector to larger vector!"); + if (isa<ConstantSDNode>(Index)) { + assert((N2.getValueType().getVectorNumElements() + + cast<ConstantSDNode>(Index)->getZExtValue() + <= VT.getVectorNumElements()) + && "Insert subvector overflow!"); + } + + // Trivial insertion. + if (VT.getSimpleVT() == N2.getSimpleValueType()) + return N2; + + // If this is an insert of an extracted vector into an undef vector, we + // can just use the input to the extract. + if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) + return N2.getOperand(0); + } + break; + } + case ISD::BITCAST: + // Fold bit_convert nodes from a type to themselves. + if (N1.getValueType() == VT) + return N1; + break; + } + + // Memoize node if it doesn't produce a flag. + SDNode *N; + SDVTList VTs = getVTList(VT); + SDValue Ops[] = {N1, N2, N3}; + if (VT != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + E->intersectFlagsWith(Flags); + return SDValue(E, 0); + } + + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + } else { + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); + } + + InsertNode(N); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VT, Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, SDValue N4, + SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VT, Ops); +} + +/// getStackArgumentTokenFactor - Compute a TokenFactor to force all +/// the incoming stack arguments to be loaded from the stack. +SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { + SmallVector<SDValue, 8> ArgChains; + + // Include the original chain at the beginning of the list. When this is + // used by target LowerCall hooks, this helps legalize find the + // CALLSEQ_BEGIN node. + ArgChains.push_back(Chain); + + // Add a chain value for each stack argument. + for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(), + UE = getEntryNode().getNode()->use_end(); U != UE; ++U) + if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) + if (FI->getIndex() < 0) + ArgChains.push_back(SDValue(L, 1)); + + // Build a tokenfactor for all the chains. + return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); +} + +/// getMemsetValue - Vectorized representation of the memset value +/// operand. +static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, + const SDLoc &dl) { + assert(!Value.isUndef()); + + unsigned NumBits = VT.getScalarSizeInBits(); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { + assert(C->getAPIntValue().getBitWidth() == 8); + APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); + if (VT.isInteger()) { + bool IsOpaque = VT.getSizeInBits() > 64 || + !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue()); + return DAG.getConstant(Val, dl, VT, false, IsOpaque); + } + return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl, + VT); + } + + assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); + EVT IntVT = VT.getScalarType(); + if (!IntVT.isInteger()) + IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits()); + + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value); + if (NumBits > 8) { + // Use a multiplication with 0x010101... to extend the input to the + // required length. + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); + Value = DAG.getNode(ISD::MUL, dl, IntVT, Value, + DAG.getConstant(Magic, dl, IntVT)); + } + + if (VT != Value.getValueType() && !VT.isInteger()) + Value = DAG.getBitcast(VT.getScalarType(), Value); + if (VT != Value.getValueType()) + Value = DAG.getSplatBuildVector(VT, dl, Value); + + return Value; +} + +/// getMemsetStringVal - Similar to getMemsetValue. Except this is only +/// used when a memcpy is turned into a memset when the source is a constant +/// string ptr. +static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, + const TargetLowering &TLI, + const ConstantDataArraySlice &Slice) { + // Handle vector with all elements zero. + if (Slice.Array == nullptr) { + if (VT.isInteger()) + return DAG.getConstant(0, dl, VT); + else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) + return DAG.getConstantFP(0.0, dl, VT); + else if (VT.isVector()) { + unsigned NumElts = VT.getVectorNumElements(); + MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + return DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getConstant(0, dl, + EVT::getVectorVT(*DAG.getContext(), + EltVT, NumElts))); + } else + llvm_unreachable("Expected type!"); + } + + assert(!VT.isVector() && "Can't handle vector type here!"); + unsigned NumVTBits = VT.getSizeInBits(); + unsigned NumVTBytes = NumVTBits / 8; + unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length)); + + APInt Val(NumVTBits, 0); + if (DAG.getDataLayout().isLittleEndian()) { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Slice[i] << i*8; + } else { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8; + } + + // If the "cost" of materializing the integer immediate is less than the cost + // of a load, then it is cost effective to turn the load into the immediate. + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) + return DAG.getConstant(Val, dl, VT); + return SDValue(nullptr, 0); +} + +SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset, + const SDLoc &DL) { + EVT VT = Base.getValueType(); + return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT)); +} + +/// Returns true if memcpy source is constant data. +static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { + uint64_t SrcDelta = 0; + GlobalAddressSDNode *G = nullptr; + if (Src.getOpcode() == ISD::GlobalAddress) + G = cast<GlobalAddressSDNode>(Src); + else if (Src.getOpcode() == ISD::ADD && + Src.getOperand(0).getOpcode() == ISD::GlobalAddress && + Src.getOperand(1).getOpcode() == ISD::Constant) { + G = cast<GlobalAddressSDNode>(Src.getOperand(0)); + SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue(); + } + if (!G) + return false; + + return getConstantDataArrayInfo(G->getGlobal(), Slice, 8, + SrcDelta + G->getOffset()); +} + +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return MF.getFunction().hasMinSize(); + return MF.getFunction().hasOptSize(); +} + +static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SmallVector<SDValue, 32> &OutChains, unsigned From, + unsigned To, SmallVector<SDValue, 16> &OutLoadChains, + SmallVector<SDValue, 16> &OutStoreChains) { + assert(OutLoadChains.size() && "Missing loads in memcpy inlining"); + assert(OutStoreChains.size() && "Missing stores in memcpy inlining"); + SmallVector<SDValue, 16> GluedLoadChains; + for (unsigned i = From; i < To; ++i) { + OutChains.push_back(OutLoadChains[i]); + GluedLoadChains.push_back(OutLoadChains[i]); + } + + // Chain for all loads. + SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + GluedLoadChains); + + for (unsigned i = From; i < To; ++i) { + StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]); + SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(), + ST->getBasePtr(), ST->getMemoryVT(), + ST->getMemOperand()); + OutChains.push_back(NewStore); + } +} + +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, unsigned Alignment, + bool isVol, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Turn a memcpy of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. + if (Src.isUndef()) + return Chain; + + // Expand memcpy to a series of load and store ops if the size operand falls + // below a certain threshold. + // TODO: In the AlwaysInline case, if the size is big then generate a loop + // rather than maybe a humongous number of loads and stores. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + if (Alignment > SrcAlign) + SrcAlign = Alignment; + ConstantDataArraySlice Slice; + bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); + bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); + + if (!TLI.findOptimalMemOpLowering( + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment), + (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, + /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) + return SDValue(); + + if (DstAlignCanChange) { + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; + + if (NewAlign > Alignment) { + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + MFI.setObjectAlignment(FI->getIndex(), NewAlign); + Alignment = NewAlign; + } + } + + MachineMemOperand::Flags MMOFlags = + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; + SmallVector<SDValue, 16> OutLoadChains; + SmallVector<SDValue, 16> OutStoreChains; + SmallVector<SDValue, 32> OutChains; + unsigned NumMemOps = MemOps.size(); + uint64_t SrcOff = 0, DstOff = 0; + for (unsigned i = 0; i != NumMemOps; ++i) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + SrcOff -= VTSize - Size; + DstOff -= VTSize - Size; + } + + if (CopyFromConstant && + (isZeroConstant || (VT.isInteger() && !VT.isVector()))) { + // It's unlikely a store of a vector immediate can be done in a single + // instruction. It would require a load from a constantpool first. + // We only handle zero vectors here. + // FIXME: Handle other cases where store of vector immediate is done in + // a single instruction. + ConstantDataArraySlice SubSlice; + if (SrcOff < Slice.Length) { + SubSlice = Slice; + SubSlice.move(SrcOff); + } else { + // This is an out-of-bounds access and hence UB. Pretend we read zero. + SubSlice.Array = nullptr; + SubSlice.Offset = 0; + SubSlice.Length = VTSize; + } + Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); + if (Value.getNode()) { + Store = DAG.getStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); + OutChains.push_back(Store); + } + } + + if (!Store.getNode()) { + // The type might not be legal for the target. This should only happen + // if the type is smaller than a legal type, as on PPC, so the right + // thing to do is generate a LoadExt/StoreTrunc pair. These simplify + // to Load/Store if NVT==VT. + // FIXME does the case above also need this? + EVT NVT = TLI.getTypeToTransformTo(C, VT); + assert(NVT.bitsGE(VT)); + + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, + DAG.getMemBasePlusOffset(Src, SrcOff, dl), + SrcPtrInfo.getWithOffset(SrcOff), VT, + MinAlign(SrcAlign, SrcOff), SrcMMOFlags); + OutLoadChains.push_back(Value.getValue(1)); + + Store = DAG.getTruncStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); + OutStoreChains.push_back(Store); + } + SrcOff += VTSize; + DstOff += VTSize; + Size -= VTSize; + } + + unsigned GluedLdStLimit = MaxLdStGlue == 0 ? + TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue; + unsigned NumLdStInMemcpy = OutStoreChains.size(); + + if (NumLdStInMemcpy) { + // It may be that memcpy might be converted to memset if it's memcpy + // of constants. In such a case, we won't have loads and stores, but + // just stores. In the absence of loads, there is nothing to gang up. + if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) { + // If target does not care, just leave as it. + for (unsigned i = 0; i < NumLdStInMemcpy; ++i) { + OutChains.push_back(OutLoadChains[i]); + OutChains.push_back(OutStoreChains[i]); + } + } else { + // Ld/St less than/equal limit set by target. + if (NumLdStInMemcpy <= GluedLdStLimit) { + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, + NumLdStInMemcpy, OutLoadChains, + OutStoreChains); + } else { + unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit; + unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit; + unsigned GlueIter = 0; + + for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) { + unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit; + unsigned IndexTo = NumLdStInMemcpy - GlueIter; + + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo, + OutLoadChains, OutStoreChains); + GlueIter += GluedLdStLimit; + } + + // Residual ld/st. + if (RemainingLdStInMemcpy) { + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, + RemainingLdStInMemcpy, OutLoadChains, + OutStoreChains); + } + } + } + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} + +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, unsigned Align, + bool isVol, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Turn a memmove of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. + if (Src.isUndef()) + return Chain; + + // Expand memmove to a series of load and store ops if the size operand falls + // below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + if (Align > SrcAlign) + SrcAlign = Align; + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); + // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in + // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the + // correct code. + bool AllowOverlap = false; + if (!TLI.findOptimalMemOpLowering( + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, + /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MF.getFunction().getAttributes())) + return SDValue(); + + if (DstAlignCanChange) { + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + MFI.setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + + MachineMemOperand::Flags MMOFlags = + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; + uint64_t SrcOff = 0, DstOff = 0; + SmallVector<SDValue, 8> LoadValues; + SmallVector<SDValue, 8> LoadChains; + SmallVector<SDValue, 8> OutChains; + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value; + + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + + Value = + DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags); + LoadValues.push_back(Value); + LoadChains.push_back(Value.getValue(1)); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + OutChains.clear(); + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Store; + + Store = DAG.getStore(Chain, dl, LoadValues[i], + DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} + +/// Lower the call to 'memset' intrinsic function into a series of store +/// operations. +/// +/// \param DAG Selection DAG where lowered code is placed. +/// \param dl Link to corresponding IR location. +/// \param Chain Control flow dependency. +/// \param Dst Pointer to destination memory location. +/// \param Src Value of byte to write into the memory. +/// \param Size Number of bytes to write. +/// \param Align Alignment of the destination in bytes. +/// \param isVol True if destination is volatile. +/// \param DstPtrInfo IR information on the memory pointer. +/// \returns New head in the control flow, if lowering was successful, empty +/// SDValue otherwise. +/// +/// The function tries to replace 'llvm.memset' intrinsic with several store +/// operations and value calculation code. This is usually profitable for small +/// memory size. +static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, unsigned Align, bool isVol, + MachinePointerInfo DstPtrInfo) { + // Turn a memset of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. + if (Src.isUndef()) + return Chain; + + // Expand memset to a series of load/store ops if the size operand + // falls below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + bool IsZeroVal = + isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); + if (!TLI.findOptimalMemOpLowering( + MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, + (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true, + /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes())) + return SDValue(); + + if (DstAlignCanChange) { + Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + MFI.setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + + SmallVector<SDValue, 8> OutChains; + uint64_t DstOff = 0; + unsigned NumMemOps = MemOps.size(); + + // Find the largest store and generate the bit pattern for it. + EVT LargestVT = MemOps[0]; + for (unsigned i = 1; i < NumMemOps; i++) + if (MemOps[i].bitsGT(LargestVT)) + LargestVT = MemOps[i]; + SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); + + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + DstOff -= VTSize - Size; + } + + // If this store is smaller than the largest store see whether we can get + // the smaller value for free with a truncate. + SDValue Value = MemSetValue; + if (VT.bitsLT(LargestVT)) { + if (!LargestVT.isVector() && !VT.isVector() && + TLI.isTruncateFree(LargestVT, VT)) + Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); + else + Value = getMemsetValue(Src, VT, DAG, dl); + } + assert(Value.getValueType() == VT && "Value with wrong type."); + SDValue Store = DAG.getStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Align, + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone); + OutChains.push_back(Store); + DstOff += VT.getSizeInBits() / 8; + Size -= VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} + +static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, + unsigned AS) { + // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all + // pointer operands can be losslessly bitcasted to pointers of address space 0 + if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) { + report_fatal_error("cannot lower memory intrinsic in address space " + + Twine(AS)); + } +} + +SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, + SDValue Src, SDValue Size, unsigned Align, + bool isVol, bool AlwaysInline, bool isTailCall, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + + // Check to see if we should lower the memcpy to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memcpy with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(),Align, + isVol, false, DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memcpy with target-specific + // code. If the target chooses to do this, this is the next best. + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemcpy( + *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, + DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } + + // If we really need inline code and the target declined to provide it, + // use a (potentially long) sequence of loads and stores. + if (AlwaysInline) { + assert(ConstantSize && "AlwaysInline requires a constant size!"); + return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Align, isVol, + true, DstPtrInfo, SrcPtrInfo); + } + + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + + // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc + // memcpy is not guaranteed to be safe. libc memcpys aren't required to + // respect volatile, so they may do things like read or write memory + // beyond the given memory regions. But fixing this isn't easy, and most + // people don't care. + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = Type::getInt8PtrTy(*getContext()); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in SDLoc + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), + Dst.getValueType().getTypeForEVT(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + +SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Src, unsigned SrcAlign, + SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + +SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, + SDValue Src, SDValue Size, unsigned Align, + bool isVol, bool isTailCall, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + + // Check to see if we should lower the memmove to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memmove with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Align, isVol, + false, DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memmove with target-specific + // code. If the target chooses to do this, this is the next best. + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemmove( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } + + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + + // FIXME: If the memmove is volatile, lowering it to plain libc memmove may + // not be safe. See memcpy above for more details. + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = Type::getInt8PtrTy(*getContext()); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in SDLoc + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), + Dst.getValueType().getTypeForEVT(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + +SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Src, unsigned SrcAlign, + SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + +SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, + SDValue Src, SDValue Size, unsigned Align, + bool isVol, bool isTailCall, + MachinePointerInfo DstPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + + // Check to see if we should lower the memset to stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memset with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), + Align, isVol, DstPtrInfo); + + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memset with target-specific + // code. If the target chooses to do this, this is the next best. + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemset( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); + if (Result.getNode()) + return Result; + } + + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext()); + Args.push_back(Entry); + Entry.Node = Src; + Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); + Args.push_back(Entry); + Entry.Node = Size; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Args.push_back(Entry); + + // FIXME: pass in SDLoc + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), + Dst.getValueType().getTypeForEVT(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + +SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Value, SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Ty = Type::getInt8Ty(*getContext()); + Entry.Node = Value; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, + SDVTList VTList, ArrayRef<SDValue> Ops, + MachineMemOperand *MMO) { + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + AddNodeIDNode(ID, Opcode, VTList, Ops); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void* IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), + VTList, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, + EVT MemVT, SDVTList VTs, SDValue Chain, + SDValue Ptr, SDValue Cmp, SDValue Swp, + MachineMemOperand *MMO) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP || + Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + + SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, + SDValue Chain, SDValue Ptr, SDValue Val, + MachineMemOperand *MMO) { + assert((Opcode == ISD::ATOMIC_LOAD_ADD || + Opcode == ISD::ATOMIC_LOAD_SUB || + Opcode == ISD::ATOMIC_LOAD_AND || + Opcode == ISD::ATOMIC_LOAD_CLR || + Opcode == ISD::ATOMIC_LOAD_OR || + Opcode == ISD::ATOMIC_LOAD_XOR || + Opcode == ISD::ATOMIC_LOAD_NAND || + Opcode == ISD::ATOMIC_LOAD_MIN || + Opcode == ISD::ATOMIC_LOAD_MAX || + Opcode == ISD::ATOMIC_LOAD_UMIN || + Opcode == ISD::ATOMIC_LOAD_UMAX || + Opcode == ISD::ATOMIC_LOAD_FADD || + Opcode == ISD::ATOMIC_LOAD_FSUB || + Opcode == ISD::ATOMIC_SWAP || + Opcode == ISD::ATOMIC_STORE) && + "Invalid Atomic Op"); + + EVT VT = Val.getValueType(); + + SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : + getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Ptr, Val}; + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, + EVT VT, SDValue Chain, SDValue Ptr, + MachineMemOperand *MMO) { + assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); + + SDVTList VTs = getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Ptr}; + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); +} + +/// getMergeValues - Create a MERGE_VALUES node from the given operands. +SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { + if (Ops.size() == 1) + return Ops[0]; + + SmallVector<EVT, 4> VTs; + VTs.reserve(Ops.size()); + for (unsigned i = 0; i < Ops.size(); ++i) + VTs.push_back(Ops[i].getValueType()); + return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops); +} + +SDValue SelectionDAG::getMemIntrinsicNode( + unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, + EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, + MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) { + if (Align == 0) // Ensure that codegen never sees alignment 0 + Align = getEVTAlignment(MemVT); + + if (!Size) + Size = MemVT.getStoreSize(); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo); + + return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); +} + +SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, + SDVTList VTList, + ArrayRef<SDValue> Ops, EVT MemVT, + MachineMemOperand *MMO) { + assert((Opcode == ISD::INTRINSIC_VOID || + Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::PREFETCH || + Opcode == ISD::LIFETIME_START || + Opcode == ISD::LIFETIME_END || + ((int)Opcode <= std::numeric_limits<int>::max() && + (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && + "Opcode is not a memory-accessing opcode!"); + + // Memoize the node unless it returns a flag. + MemIntrinsicSDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops); + ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>( + Opcode, dl.getIROrder(), VTList, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), + VTList, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + } else { + N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), + VTList, MemVT, MMO); + createOperands(N, Ops); + } + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, + SDValue Chain, int FrameIndex, + int64_t Size, int64_t Offset) { + const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END; + const auto VTs = getVTList(MVT::Other); + SDValue Ops[2] = { + Chain, + getFrameIndex(FrameIndex, + getTargetLoweringInfo().getFrameIndexTy(getDataLayout()), + true)}; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + ID.AddInteger(FrameIndex); + ID.AddInteger(Size); + ID.AddInteger(Offset); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) + return SDValue(E, 0); + + LifetimeSDNode *N = newSDNode<LifetimeSDNode>( + Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it. This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, + SelectionDAG &DAG, SDValue Ptr, + int64_t Offset = 0) { + // If this is FI+Offset, we can model it. + if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) + return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + FI->getIndex(), Offset); + + // If this is (FI+Offset1)+Offset2, we can model it. + if (Ptr.getOpcode() != ISD::ADD || + !isa<ConstantSDNode>(Ptr.getOperand(1)) || + !isa<FrameIndexSDNode>(Ptr.getOperand(0))) + return Info; + + int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + return MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI, + Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); +} + +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it. This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, + SelectionDAG &DAG, SDValue Ptr, + SDValue OffsetOp) { + // If the 'Offset' value isn't a constant, we can't handle this. + if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) + return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue()); + if (OffsetOp.isUndef()) + return InferPointerInfo(Info, DAG, Ptr); + return Info; +} + +SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Offset, + MachinePointerInfo PtrInfo, EVT MemVT, + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, const MDNode *Ranges) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + MMOFlags |= MachineMemOperand::MOLoad; + assert((MMOFlags & MachineMemOperand::MOStore) == 0); + // If we don't have a PtrInfo, infer the trivial frame index case to simplify + // clients. + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges); + return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); +} + +SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Offset, EVT MemVT, + MachineMemOperand *MMO) { + if (VT == MemVT) { + ExtType = ISD::NON_EXTLOAD; + } else if (ExtType == ISD::NON_EXTLOAD) { + assert(VT == MemVT && "Non-extending load from different memory type!"); + } else { + // Extending load. + assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be an extending load, not truncating!"); + assert(VT.isInteger() == MemVT.isInteger() && + "Cannot convert from FP to Int or Int -> FP!"); + assert(VT.isVector() == MemVT.isVector() && + "Cannot use an ext load to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == MemVT.getVectorNumElements()) && + "Cannot use an ext load to change the number of vector elements!"); + } + + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); + + SDVTList VTs = Indexed ? + getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>( + dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<LoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + ExtType, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, MachinePointerInfo PtrInfo, + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, const MDNode *Ranges) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges); +} + +SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + VT, MMO); +} + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, + EVT VT, SDValue Chain, SDValue Ptr, + MachinePointerInfo PtrInfo, EVT MemVT, + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo, + MemVT, Alignment, MMOFlags, AAInfo); +} + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, + EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, + MemVT, MMO); +} + +SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); + assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); + // Don't propagate the invariant or dereferenceable flags. + auto MMOFlags = + LD->getMemOperand()->getFlags() & + ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); + return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, + LD->getChain(), Base, Offset, LD->getPointerInfo(), + LD->getMemoryVT(), LD->getAlignment(), MMOFlags, + LD->getAAInfo()); +} + +SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Ptr, MachinePointerInfo PtrInfo, + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(Val.getValueType()); + + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo); + return getStore(Chain, dl, Val, Ptr, MMO); +} + +SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Ptr, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>( + dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<StoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ISD::UNINDEXED, false, VT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Ptr, MachinePointerInfo PtrInfo, + EVT SVT, unsigned Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(SVT); + + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo); + return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Ptr, EVT SVT, + MachineMemOperand *MMO) { + EVT VT = Val.getValueType(); + + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + if (VT == SVT) + return getStore(Chain, dl, Val, Ptr, MMO); + + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); + assert(VT.isInteger() == SVT.isInteger() && + "Can't do FP-INT conversion!"); + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == SVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>( + dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<StoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ISD::UNINDEXED, true, SVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + StoreSDNode *ST = cast<StoreSDNode>(OrigStore); + assert(ST->getOffset().isUndef() && "Store is already a indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + ST->isTruncatingStore(), ST->getMemoryVT(), + ST->getMemOperand()); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue PassThru, + EVT MemVT, MachineMemOperand *MMO, + ISD::LoadExtType ExtTy, bool isExpanding) { + SDVTList VTs = getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, PassThru }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>( + dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ExtTy, isExpanding, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, + SDValue Val, SDValue Ptr, SDValue Mask, + EVT MemVT, MachineMemOperand *MMO, + bool IsTruncating, bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + SDVTList VTs = getVTList(MVT::Other); + SDValue Ops[] = { Chain, Val, Ptr, Mask }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>( + dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + IsTruncating, IsCompressing, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { + assert(Ops.size() == 6 && "Incompatible number of operands"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>( + dl.getIROrder(), VTs, VT, MMO, IndexType)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), + VTs, VT, MMO, IndexType); + createOperands(N, Ops); + + assert(N->getPassThru().getValueType() == N->getValueType(0) && + "Incompatible type of the PassThru value in MaskedGatherSDNode"); + assert(N->getMask().getValueType().getVectorNumElements() == + N->getValueType(0).getVectorNumElements() && + "Vector width mismatch between mask and data"); + assert(N->getIndex().getValueType().getVectorNumElements() >= + N->getValueType(0).getVectorNumElements() && + "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { + assert(Ops.size() == 6 && "Incompatible number of operands"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>( + dl.getIROrder(), VTs, VT, MMO, IndexType)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), + VTs, VT, MMO, IndexType); + createOperands(N, Ops); + + assert(N->getMask().getValueType().getVectorNumElements() == + N->getValue().getValueType().getVectorNumElements() && + "Vector width mismatch between mask and data"); + assert(N->getIndex().getValueType().getVectorNumElements() >= + N->getValue().getValueType().getVectorNumElements() && + "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { + // select undef, T, F --> T (if T is a constant), otherwise F + // select, ?, undef, F --> F + // select, ?, T, undef --> T + if (Cond.isUndef()) + return isConstantValueOfAnyType(T) ? T : F; + if (T.isUndef()) + return F; + if (F.isUndef()) + return T; + + // select true, T, F --> T + // select false, T, F --> F + if (auto *CondC = dyn_cast<ConstantSDNode>(Cond)) + return CondC->isNullValue() ? F : T; + + // TODO: This should simplify VSELECT with constant condition using something + // like this (but check boolean contents to be complete?): + // if (ISD::isBuildVectorAllOnes(Cond.getNode())) + // return T; + // if (ISD::isBuildVectorAllZeros(Cond.getNode())) + // return F; + + // select ?, T, T --> T + if (T == F) + return T; + + return SDValue(); +} + +SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { + // shift undef, Y --> 0 (can always assume that the undef value is 0) + if (X.isUndef()) + return getConstant(0, SDLoc(X.getNode()), X.getValueType()); + // shift X, undef --> undef (because it may shift by the bitwidth) + if (Y.isUndef()) + return getUNDEF(X.getValueType()); + + // shift 0, Y --> 0 + // shift X, 0 --> X + if (isNullOrNullSplat(X) || isNullOrNullSplat(Y)) + return X; + + // shift X, C >= bitwidth(X) --> undef + // All vector elements must be too big (or undef) to avoid partial undefs. + auto isShiftTooBig = [X](ConstantSDNode *Val) { + return !Val || Val->getAPIntValue().uge(X.getScalarValueSizeInBits()); + }; + if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true)) + return getUNDEF(X.getValueType()); + + return SDValue(); +} + +// TODO: Use fast-math-flags to enable more simplifications. +SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) { + ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true); + if (!YC) + return SDValue(); + + // X + -0.0 --> X + if (Opcode == ISD::FADD) + if (YC->getValueAPF().isNegZero()) + return X; + + // X - +0.0 --> X + if (Opcode == ISD::FSUB) + if (YC->getValueAPF().isPosZero()) + return X; + + // X * 1.0 --> X + // X / 1.0 --> X + if (Opcode == ISD::FMUL || Opcode == ISD::FDIV) + if (YC->getValueAPF().isExactlyValue(1.0)) + return X; + + return SDValue(); +} + +SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue SV, unsigned Align) { + SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + ArrayRef<SDUse> Ops) { + switch (Ops.size()) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0])); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + // Copy from an SDUse array into an SDValue array for use with + // the regular getNode logic. + SmallVector<SDValue, 8> NewOps(Ops.begin(), Ops.end()); + return getNode(Opcode, DL, VT, NewOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { + unsigned NumOps = Ops.size(); + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0], Flags); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2], Flags); + default: break; + } + + switch (Opcode) { + default: break; + case ISD::BUILD_VECTOR: + // Attempt to simplify BUILD_VECTOR. + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; + case ISD::CONCAT_VECTORS: + if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) + return V; + break; + case ISD::SELECT_CC: + assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); + assert(Ops[0].getValueType() == Ops[1].getValueType() && + "LHS and RHS of condition must have same type!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "True and False arms of SelectCC must have same type!"); + assert(Ops[2].getValueType() == VT && + "select_cc node must be of same type as true and false value!"); + break; + case ISD::BR_CC: + assert(NumOps == 5 && "BR_CC takes 5 operands!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "LHS/RHS of comparison should match types!"); + break; + } + + // Memoize nodes. + SDNode *N; + SDVTList VTs = getVTList(VT); + + if (VT != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; + + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + return SDValue(E, 0); + + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + } else { + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); + } + + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, + ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { + return getNode(Opcode, DL, getVTList(ResultTys), Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + ArrayRef<SDValue> Ops) { + if (VTList.NumVTs == 1) + return getNode(Opcode, DL, VTList.VTs[0], Ops); + +#if 0 + switch (Opcode) { + // FIXME: figure out how to safely handle things like + // int foo(int x) { return 1 << (x & 255); } + // int bar() { return foo(256); } + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: + case ISD::SHL_PARTS: + if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + else if (N3.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { + // If the and is only masking out bits that cannot effect the shift, + // eliminate the and. + unsigned NumBits = VT.getScalarSizeInBits()*2; + if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + } + break; + } +#endif + + // Memoize the node unless it returns a flag. + SDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + return SDValue(E, 0); + + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + } else { + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); + createOperands(N, Ops); + } + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, + SDVTList VTList) { + return getNode(Opcode, DL, VTList, None); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + SDValue N1) { + SDValue Ops[] = { N1 }; + return getNode(Opcode, DL, VTList, Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + SDValue N1, SDValue N2) { + SDValue Ops[] = { N1, N2 }; + return getNode(Opcode, DL, VTList, Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3) { + SDValue Ops[] = { N1, N2, N3 }; + return getNode(Opcode, DL, VTList, Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VTList, Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, SDValue N4, + SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VTList, Ops); +} + +SDVTList SelectionDAG::getVTList(EVT VT) { + return makeVTList(SDNode::getValueTypeList(VT), 1); +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { + FoldingSetNodeID ID; + ID.AddInteger(2U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + + void *IP = nullptr; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (!Result) { + EVT *Array = Allocator.Allocate<EVT>(2); + Array[0] = VT1; + Array[1] = VT2; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { + FoldingSetNodeID ID; + ID.AddInteger(3U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + + void *IP = nullptr; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (!Result) { + EVT *Array = Allocator.Allocate<EVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { + FoldingSetNodeID ID; + ID.AddInteger(4U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + ID.AddInteger(VT4.getRawBits()); + + void *IP = nullptr; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (!Result) { + EVT *Array = Allocator.Allocate<EVT>(4); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); +} + +SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) { + unsigned NumVTs = VTs.size(); + FoldingSetNodeID ID; + ID.AddInteger(NumVTs); + for (unsigned index = 0; index < NumVTs; index++) { + ID.AddInteger(VTs[index].getRawBits()); + } + + void *IP = nullptr; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (!Result) { + EVT *Array = Allocator.Allocate<EVT>(NumVTs); + llvm::copy(VTs, Array); + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); +} + + +/// UpdateNodeOperands - *Mutate* the specified node in-place to have the +/// specified operands. If the resultant node already exists in the DAG, +/// this does not modify the specified node, instead it returns the node that +/// already exists. If the resultant node does not exist in the DAG, the +/// input node is returned. As a degenerate case, if you specify the same +/// input operands as the node already has, the input node is returned. +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { + assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op == N->getOperand(0)) return N; + + // See if the modified node already exists. + void *InsertPos = nullptr; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = nullptr; + + // Now we update the operands. + N->OperandList[0].set(Op); + + updateDivergence(N); + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { + assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) + return N; // No operands changed, just return the input node. + + // See if the modified node already exists. + void *InsertPos = nullptr; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = nullptr; + + // Now we update the operands. + if (N->OperandList[0] != Op1) + N->OperandList[0].set(Op1); + if (N->OperandList[1] != Op2) + N->OperandList[1].set(Op2); + + updateDivergence(N); + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { + SDValue Ops[] = { Op1, Op2, Op3 }; + return UpdateNodeOperands(N, Ops); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4) { + SDValue Ops[] = { Op1, Op2, Op3, Op4 }; + return UpdateNodeOperands(N, Ops); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4, SDValue Op5) { + SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; + return UpdateNodeOperands(N, Ops); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { + unsigned NumOps = Ops.size(); + assert(N->getNumOperands() == NumOps && + "Update with wrong number of operands"); + + // If no operands changed just return the input node. + if (std::equal(Ops.begin(), Ops.end(), N->op_begin())) + return N; + + // See if the modified node already exists. + void *InsertPos = nullptr; + if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = nullptr; + + // Now we update the operands. + for (unsigned i = 0; i != NumOps; ++i) + if (N->OperandList[i] != Ops[i]) + N->OperandList[i].set(Ops[i]); + + updateDivergence(N); + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +/// DropOperands - Release the operands and set this node to have +/// zero operands. +void SDNode::DropOperands() { + // Unlike the code in MorphNodeTo that does this, we don't need to + // watch for dead nodes here. + for (op_iterator I = op_begin(), E = op_end(); I != E; ) { + SDUse &Use = *I++; + Use.set(SDValue()); + } +} + +void SelectionDAG::setNodeMemRefs(MachineSDNode *N, + ArrayRef<MachineMemOperand *> NewMemRefs) { + if (NewMemRefs.empty()) { + N->clearMemRefs(); + return; + } + + // Check if we can avoid allocating by storing a single reference directly. + if (NewMemRefs.size() == 1) { + N->MemRefs = NewMemRefs[0]; + N->NumMemRefs = 1; + return; + } + + MachineMemOperand **MemRefsBuffer = + Allocator.template Allocate<MachineMemOperand *>(NewMemRefs.size()); + llvm::copy(NewMemRefs, MemRefsBuffer); + N->MemRefs = MemRefsBuffer; + N->NumMemRefs = static_cast<int>(NewMemRefs.size()); +} + +/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a +/// machine opcode. +/// +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, None); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, None); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, EVT VT3, + ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + SDVTList VTs,ArrayRef<SDValue> Ops) { + SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops); + // Reset the NodeID to -1. + New->setNodeId(-1); + if (New != N) { + ReplaceAllUsesWith(N, New); + RemoveDeadNode(N); + } + return New; +} + +/// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away +/// the line number information on the merged node since it is not possible to +/// preserve the information that operation is associated with multiple lines. +/// This will make the debugger working better at -O0, were there is a higher +/// probability having other instructions associated with that line. +/// +/// For IROrder, we keep the smaller of the two +SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) { + DebugLoc NLoc = N->getDebugLoc(); + if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) { + N->setDebugLoc(DebugLoc()); + } + unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); + N->setIROrder(Order); + return N; +} + +/// MorphNodeTo - This *mutates* the specified node to have the specified +/// return type, opcode, and operands. +/// +/// Note that MorphNodeTo returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. Note that the SDLoc need not be the same. +/// +/// Using MorphNodeTo is faster than creating a new node and swapping it in +/// with ReplaceAllUsesWith both because it often avoids allocating a new +/// node, and because it doesn't require CSE recalculation for any of +/// the node's users. +/// +/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG. +/// As a consequence it isn't appropriate to use from within the DAG combiner or +/// the legalizer which maintain worklists that would need to be updated when +/// deleting things. +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + SDVTList VTs, ArrayRef<SDValue> Ops) { + // If an identical node already exists, use it. + void *IP = nullptr; + if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, VTs, Ops); + if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP)) + return UpdateSDLocOnMergeSDNode(ON, SDLoc(N)); + } + + if (!RemoveNodeFromCSEMaps(N)) + IP = nullptr; + + // Start the morphing. + N->NodeType = Opc; + N->ValueList = VTs.VTs; + N->NumValues = VTs.NumVTs; + + // Clear the operands list, updating used nodes to remove this from their + // use list. Keep track of any operands that become dead as a result. + SmallPtrSet<SDNode*, 16> DeadNodeSet; + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Used = Use.getNode(); + Use.set(SDValue()); + if (Used->use_empty()) + DeadNodeSet.insert(Used); + } + + // For MachineNode, initialize the memory references information. + if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) + MN->clearMemRefs(); + + // Swap for an appropriately sized array from the recycler. + removeOperands(N); + createOperands(N, Ops); + + // Delete any nodes that are still dead after adding the uses for the + // new operands. + if (!DeadNodeSet.empty()) { + SmallVector<SDNode *, 16> DeadNodes; + for (SDNode *N : DeadNodeSet) + if (N->use_empty()) + DeadNodes.push_back(N); + RemoveDeadNodes(DeadNodes); + } + + if (IP) + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + +SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { + unsigned OrigOpc = Node->getOpcode(); + unsigned NewOpc; + switch (OrigOpc) { + default: + llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); + case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; + case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; + case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; + case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; + case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; + case ISD::STRICT_FMA: NewOpc = ISD::FMA; break; + case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; break; + case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; + case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; + case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; break; + case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; break; + case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; break; + case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; break; + case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break; + case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break; + case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break; + case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break; + case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break; + case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break; + case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; + case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; + case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; + case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break; + case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break; + case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break; + case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break; + case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break; + case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; + case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; + case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; + case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; + } + + assert(Node->getNumValues() == 2 && "Unexpected number of results!"); + + // We're taking this node out of the chain, so we need to re-link things. + SDValue InputChain = Node->getOperand(0); + SDValue OutputChain = SDValue(Node, 1); + ReplaceAllUsesOfValueWith(OutputChain, InputChain); + + SmallVector<SDValue, 3> Ops; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(Node->getOperand(i)); + + SDVTList VTs = getVTList(Node->getValueType(0)); + SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops); + + // MorphNodeTo can operate in two ways: if an existing node with the + // specified operands exists, it can just return it. Otherwise, it + // updates the node in place to have the requested operands. + if (Res == Node) { + // If we updated the node in place, reset the node ID. To the isel, + // this should be just like a newly allocated machine node. + Res->setNodeId(-1); + } else { + ReplaceAllUsesWith(Node, Res); + RemoveDeadNode(Node); + } + + return Res; +} + +/// getMachineNode - These are used for target selectors to create a new node +/// with specified return type(s), MachineInstr opcode, and operands. +/// +/// Note that getMachineNode returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, None); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, + ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT1, VT2); + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, EVT VT3, + ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + ArrayRef<EVT> ResultTys, + ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(ResultTys); + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, + SDVTList VTs, + ArrayRef<SDValue> Ops) { + bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; + MachineSDNode *N; + void *IP = nullptr; + + if (DoCSE) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ~Opcode, VTs, Ops); + IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL)); + } + } + + // Allocate a new MachineSDNode. + N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); + + if (DoCSE) + CSEMap.InsertNode(N, IP); + + InsertNode(N); + NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this); + return N; +} + +/// getTargetExtractSubreg - A convenience function for creating +/// TargetOpcode::EXTRACT_SUBREG nodes. +SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, + SDValue Operand) { + SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); + SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + VT, Operand, SRIdxVal); + return SDValue(Subreg, 0); +} + +/// getTargetInsertSubreg - A convenience function for creating +/// TargetOpcode::INSERT_SUBREG nodes. +SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, + SDValue Operand, SDValue Subreg) { + SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); + SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, + VT, Operand, Subreg, SRIdxVal); + return SDValue(Result, 0); +} + +/// getNodeIfExists - Get the specified node if it's already available, or +/// else return NULL. +SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, + ArrayRef<SDValue> Ops, + const SDNodeFlags Flags) { + if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { + E->intersectFlagsWith(Flags); + return E; + } + } + return nullptr; +} + +/// getDbgValue - Creates a SDDbgValue node. +/// +/// SDNode +SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr, + SDNode *N, unsigned R, bool IsIndirect, + const DebugLoc &DL, unsigned O) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, N, R, IsIndirect, DL, O); +} + +/// Constant +SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var, + DIExpression *Expr, + const Value *C, + const DebugLoc &DL, unsigned O) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, DL, O); +} + +/// FrameIndex +SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, + DIExpression *Expr, unsigned FI, + bool IsIndirect, + const DebugLoc &DL, + unsigned O) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX); +} + +/// VReg +SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, + DIExpression *Expr, + unsigned VReg, bool IsIndirect, + const DebugLoc &DL, unsigned O) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG); +} + +void SelectionDAG::transferDbgValues(SDValue From, SDValue To, + unsigned OffsetInBits, unsigned SizeInBits, + bool InvalidateDbg) { + SDNode *FromNode = From.getNode(); + SDNode *ToNode = To.getNode(); + assert(FromNode && ToNode && "Can't modify dbg values"); + + // PR35338 + // TODO: assert(From != To && "Redundant dbg value transfer"); + // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer"); + if (From == To || FromNode == ToNode) + return; + + if (!FromNode->getHasDebugValue()) + return; + + SmallVector<SDDbgValue *, 2> ClonedDVs; + for (SDDbgValue *Dbg : GetDbgValues(FromNode)) { + if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated()) + continue; + + // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value"); + + // Just transfer the dbg value attached to From. + if (Dbg->getResNo() != From.getResNo()) + continue; + + DIVariable *Var = Dbg->getVariable(); + auto *Expr = Dbg->getExpression(); + // If a fragment is requested, update the expression. + if (SizeInBits) { + // When splitting a larger (e.g., sign-extended) value whose + // lower bits are described with an SDDbgValue, do not attempt + // to transfer the SDDbgValue to the upper bits. + if (auto FI = Expr->getFragmentInfo()) + if (OffsetInBits + SizeInBits > FI->SizeInBits) + continue; + auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits, + SizeInBits); + if (!Fragment) + continue; + Expr = *Fragment; + } + // Clone the SDDbgValue and move it to To. + SDDbgValue *Clone = + getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), + Dbg->getDebugLoc(), Dbg->getOrder()); + ClonedDVs.push_back(Clone); + + if (InvalidateDbg) { + // Invalidate value and indicate the SDDbgValue should not be emitted. + Dbg->setIsInvalidated(); + Dbg->setIsEmitted(); + } + } + + for (SDDbgValue *Dbg : ClonedDVs) + AddDbgValue(Dbg, ToNode, false); +} + +void SelectionDAG::salvageDebugInfo(SDNode &N) { + if (!N.getHasDebugValue()) + return; + + SmallVector<SDDbgValue *, 2> ClonedDVs; + for (auto DV : GetDbgValues(&N)) { + if (DV->isInvalidated()) + continue; + switch (N.getOpcode()) { + default: + break; + case ISD::ADD: + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + if (!isConstantIntBuildVectorOrConstantInt(N0) && + isConstantIntBuildVectorOrConstantInt(N1)) { + uint64_t Offset = N.getConstantOperandVal(1); + // Rewrite an ADD constant node into a DIExpression. Since we are + // performing arithmetic to compute the variable's *value* in the + // DIExpression, we need to mark the expression with a + // DW_OP_stack_value. + auto *DIExpr = DV->getExpression(); + DIExpr = + DIExpression::prepend(DIExpr, DIExpression::StackValue, Offset); + SDDbgValue *Clone = + getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(), + DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); + ClonedDVs.push_back(Clone); + DV->setIsInvalidated(); + DV->setIsEmitted(); + LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; + N0.getNode()->dumprFull(this); + dbgs() << " into " << *DIExpr << '\n'); + } + } + } + + for (SDDbgValue *Dbg : ClonedDVs) + AddDbgValue(Dbg, Dbg->getSDNode(), false); +} + +/// Creates a SDDbgLabel node. +SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label, + const DebugLoc &DL, unsigned O) { + assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O); +} + +namespace { + +/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node +/// pointed to by a use iterator is deleted, increment the use iterator +/// so that it doesn't dangle. +/// +class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { + SDNode::use_iterator &UI; + SDNode::use_iterator &UE; + + void NodeDeleted(SDNode *N, SDNode *E) override { + // Increment the iterator as needed. + while (UI != UE && N == *UI) + ++UI; + } + +public: + RAUWUpdateListener(SelectionDAG &d, + SDNode::use_iterator &ui, + SDNode::use_iterator &ue) + : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} +}; + +} // end anonymous namespace + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes From has a single result value. +/// +void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { + SDNode *From = FromN.getNode(); + assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && + "Cannot replace with this method!"); + assert(From != To.getNode() && "Cannot replace uses of with self"); + + // Preserve Debug Values + transferDbgValues(FromN, To); + + // Iterate over all the existing uses of From. New uses will be added + // to the beginning of the use list, which we avoid visiting. + // This specifically avoids visiting uses of From that arise while the + // replacement is happening, because any such uses would be the result + // of CSE: If an existing node looks like From after one of its operands + // is replaced by To, we don't want to replace of all its users with To + // too. See PR3018 for more info. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.set(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); + } while (UI != UE && *UI == User); + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (FromN == getRoot()) + setRoot(To); +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes that for each value of From, there is a +/// corresponding value in To in the same position with the same type. +/// +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { +#ifndef NDEBUG + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + assert((!From->hasAnyUseOfValue(i) || + From->getValueType(i) == To->getValueType(i)) && + "Cannot use this version of ReplaceAllUsesWith!"); +#endif + + // Handle the trivial case. + if (From == To) + return; + + // Preserve Debug Info. Only do this if there's a use. + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + if (From->hasAnyUseOfValue(i)) { + assert((i < To->getNumValues()) && "Invalid To location"); + transferDbgValues(SDValue(From, i), SDValue(To, i)); + } + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.setNode(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To, getRoot().getResNo())); +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version can replace From with any result values. To must match the +/// number and types of values returned by From. +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { + if (From->getNumValues() == 1) // Handle the simple case efficiently. + return ReplaceAllUsesWith(SDValue(From, 0), To[0]); + + // Preserve Debug Info. + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + transferDbgValues(SDValue(From, i), To[i]); + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this happens the + // uses are usually next to each other in the list. To help reduce the + // number of CSE and divergence recomputations, process all the uses of this + // user that we can find this way. + bool To_IsDivergent = false; + do { + SDUse &Use = UI.getUse(); + const SDValue &ToOp = To[Use.getResNo()]; + ++UI; + Use.set(ToOp); + To_IsDivergent |= ToOp->isDivergent(); + } while (UI != UE && *UI == User); + + if (To_IsDivergent != From->isDivergent()) + updateDivergence(User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To[getRoot().getResNo()])); +} + +/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The Deleted +/// vector is handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ + // Handle the really simple, really trivial case efficiently. + if (From == To) return; + + // Handle the simple, trivial, case efficiently. + if (From.getNode()->getNumValues() == 1) { + ReplaceAllUsesWith(From, To); + return; + } + + // Preserve Debug Info. + transferDbgValues(From, To); + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From.getNode()->use_begin(), + UE = From.getNode()->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + bool UserRemovedFromCSEMaps = false; + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + + // Skip uses of different values from the same node. + if (Use.getResNo() != From.getResNo()) { + ++UI; + continue; + } + + // If this node hasn't been modified yet, it's still in the CSE maps, + // so remove its old self from the CSE maps. + if (!UserRemovedFromCSEMaps) { + RemoveNodeFromCSEMaps(User); + UserRemovedFromCSEMaps = true; + } + + ++UI; + Use.set(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); + } while (UI != UE && *UI == User); + // We are iterating over all uses of the From node, so if a use + // doesn't use the specific value, no changes are made. + if (!UserRemovedFromCSEMaps) + continue; + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (From == getRoot()) + setRoot(To); +} + +namespace { + + /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith + /// to record information about a use. + struct UseMemo { + SDNode *User; + unsigned Index; + SDUse *Use; + }; + + /// operator< - Sort Memos by User. + bool operator<(const UseMemo &L, const UseMemo &R) { + return (intptr_t)L.User < (intptr_t)R.User; + } + +} // end anonymous namespace + +void SelectionDAG::updateDivergence(SDNode * N) +{ + if (TLI->isSDNodeAlwaysUniform(N)) + return; + bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); + for (auto &Op : N->ops()) { + if (Op.Val.getValueType() != MVT::Other) + IsDivergent |= Op.getNode()->isDivergent(); + } + if (N->SDNodeBits.IsDivergent != IsDivergent) { + N->SDNodeBits.IsDivergent = IsDivergent; + for (auto U : N->uses()) { + updateDivergence(U); + } + } +} + +void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { + DenseMap<SDNode *, unsigned> Degree; + Order.reserve(AllNodes.size()); + for (auto &N : allnodes()) { + unsigned NOps = N.getNumOperands(); + Degree[&N] = NOps; + if (0 == NOps) + Order.push_back(&N); + } + for (size_t I = 0; I != Order.size(); ++I) { + SDNode *N = Order[I]; + for (auto U : N->uses()) { + unsigned &UnsortedOps = Degree[U]; + if (0 == --UnsortedOps) + Order.push_back(U); + } + } +} + +#ifndef NDEBUG +void SelectionDAG::VerifyDAGDiverence() { + std::vector<SDNode *> TopoOrder; + CreateTopologicalOrder(TopoOrder); + const TargetLowering &TLI = getTargetLoweringInfo(); + DenseMap<const SDNode *, bool> DivergenceMap; + for (auto &N : allnodes()) { + DivergenceMap[&N] = false; + } + for (auto N : TopoOrder) { + bool IsDivergent = DivergenceMap[N]; + bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA); + for (auto &Op : N->ops()) { + if (Op.Val.getValueType() != MVT::Other) + IsSDNodeDivergent |= DivergenceMap[Op.getNode()]; + } + if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) { + DivergenceMap[N] = true; + } + } + for (auto &N : allnodes()) { + (void)N; + assert(DivergenceMap[&N] == N.isDivergent() && + "Divergence bit inconsistency detected\n"); + } +} +#endif + +/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The same value +/// may appear in both the From and To list. The Deleted vector is +/// handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, + const SDValue *To, + unsigned Num){ + // Handle the simple, trivial case efficiently. + if (Num == 1) + return ReplaceAllUsesOfValueWith(*From, *To); + + transferDbgValues(*From, *To); + + // Read up all the uses and make records of them. This helps + // processing new uses that are introduced during the + // replacement process. + SmallVector<UseMemo, 4> Uses; + for (unsigned i = 0; i != Num; ++i) { + unsigned FromResNo = From[i].getResNo(); + SDNode *FromNode = From[i].getNode(); + for (SDNode::use_iterator UI = FromNode->use_begin(), + E = FromNode->use_end(); UI != E; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == FromResNo) { + UseMemo Memo = { *UI, i, &Use }; + Uses.push_back(Memo); + } + } + } + + // Sort the uses, so that all the uses from a given User are together. + llvm::sort(Uses); + + for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); + UseIndex != UseIndexEnd; ) { + // We know that this user uses some value of From. If it is the right + // value, update it. + SDNode *User = Uses[UseIndex].User; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // The Uses array is sorted, so all the uses for a given User + // are next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + unsigned i = Uses[UseIndex].Index; + SDUse &Use = *Uses[UseIndex].Use; + ++UseIndex; + + Use.set(To[i]); + } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } +} + +/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG +/// based on their topological order. It returns the maximum id and a vector +/// of the SDNodes* in assigned order by reference. +unsigned SelectionDAG::AssignTopologicalOrder() { + unsigned DAGSize = 0; + + // SortedPos tracks the progress of the algorithm. Nodes before it are + // sorted, nodes after it are unsorted. When the algorithm completes + // it is at the end of the list. + allnodes_iterator SortedPos = allnodes_begin(); + + // Visit all the nodes. Move nodes with no operands to the front of + // the list immediately. Annotate nodes that do have operands with their + // operand count. Before we do this, the Node Id fields of the nodes + // may contain arbitrary values. After, the Node Id fields for nodes + // before SortedPos will contain the topological sort index, and the + // Node Id fields for nodes At SortedPos and after will contain the + // count of outstanding operands. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { + SDNode *N = &*I++; + checkForCycles(N, this); + unsigned Degree = N->getNumOperands(); + if (Degree == 0) { + // A node with no uses, add it to the result array immediately. + N->setNodeId(DAGSize++); + allnodes_iterator Q(N); + if (Q != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); + assert(SortedPos != AllNodes.end() && "Overran node list"); + ++SortedPos; + } else { + // Temporarily use the Node Id as scratch space for the degree count. + N->setNodeId(Degree); + } + } + + // Visit all the nodes. As we iterate, move nodes into sorted order, + // such that by the time the end is reached all nodes will be sorted. + for (SDNode &Node : allnodes()) { + SDNode *N = &Node; + checkForCycles(N, this); + // N is in sorted position, so all its uses have one less operand + // that needs to be sorted. + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *P = *UI; + unsigned Degree = P->getNodeId(); + assert(Degree != 0 && "Invalid node degree"); + --Degree; + if (Degree == 0) { + // All of P's operands are sorted, so P may sorted now. + P->setNodeId(DAGSize++); + if (P->getIterator() != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); + assert(SortedPos != AllNodes.end() && "Overran node list"); + ++SortedPos; + } else { + // Update P's outstanding operand count. + P->setNodeId(Degree); + } + } + if (Node.getIterator() == SortedPos) { +#ifndef NDEBUG + allnodes_iterator I(N); + SDNode *S = &*++I; + dbgs() << "Overran sorted position:\n"; + S->dumprFull(this); dbgs() << "\n"; + dbgs() << "Checking if this is due to cycles\n"; + checkForCycles(this, true); +#endif + llvm_unreachable(nullptr); + } + } + + assert(SortedPos == AllNodes.end() && + "Topological sort incomplete!"); + assert(AllNodes.front().getOpcode() == ISD::EntryToken && + "First node in topological sort is not the entry token!"); + assert(AllNodes.front().getNodeId() == 0 && + "First node in topological sort has non-zero id!"); + assert(AllNodes.front().getNumOperands() == 0 && + "First node in topological sort has operands!"); + assert(AllNodes.back().getNodeId() == (int)DAGSize-1 && + "Last node in topologic sort has unexpected id!"); + assert(AllNodes.back().use_empty() && + "Last node in topologic sort has users!"); + assert(DAGSize == allnodes_size() && "Node count mismatch!"); + return DAGSize; +} + +/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the +/// value is produced by SD. +void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { + if (SD) { + assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue()); + SD->setHasDebugValue(true); + } + DbgInfo->add(DB, SD, isParameter); +} + +void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { + DbgInfo->add(DB); +} + +SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, + SDValue NewMemOp) { + assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); + // The new memory operation must have the same position as the old load in + // terms of memory dependency. Create a TokenFactor for the old load and new + // memory operation and update uses of the old load's output chain to use that + // TokenFactor. + SDValue OldChain = SDValue(OldLoad, 1); + SDValue NewChain = SDValue(NewMemOp.getNode(), 1); + if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1)) + return NewChain; + + SDValue TokenFactor = + getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain); + ReplaceAllUsesOfValueWith(OldChain, TokenFactor); + UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); + return TokenFactor; +} + +SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, + Function **OutFunction) { + assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol"); + + auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol(); + auto *Module = MF->getFunction().getParent(); + auto *Function = Module->getFunction(Symbol); + + if (OutFunction != nullptr) + *OutFunction = Function; + + if (Function != nullptr) { + auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace()); + return getGlobalAddress(Function, SDLoc(Op), PtrTy); + } + + std::string ErrorStr; + raw_string_ostream ErrorFormatter(ErrorStr); + + ErrorFormatter << "Undefined external symbol "; + ErrorFormatter << '"' << Symbol << '"'; + ErrorFormatter.flush(); + + report_fatal_error(ErrorStr); +} + +//===----------------------------------------------------------------------===// +// SDNode Class +//===----------------------------------------------------------------------===// + +bool llvm::isNullConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isNullValue(); +} + +bool llvm::isNullFPConstant(SDValue V) { + ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); + return Const != nullptr && Const->isZero() && !Const->isNegative(); +} + +bool llvm::isAllOnesConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isAllOnesValue(); +} + +bool llvm::isOneConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isOne(); +} + +SDValue llvm::peekThroughBitcasts(SDValue V) { + while (V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + return V; +} + +SDValue llvm::peekThroughOneUseBitcasts(SDValue V) { + while (V.getOpcode() == ISD::BITCAST && V.getOperand(0).hasOneUse()) + V = V.getOperand(0); + return V; +} + +SDValue llvm::peekThroughExtractSubvectors(SDValue V) { + while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) + V = V.getOperand(0); + return V; +} + +bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) { + if (V.getOpcode() != ISD::XOR) + return false; + V = peekThroughBitcasts(V.getOperand(1)); + unsigned NumBits = V.getScalarValueSizeInBits(); + ConstantSDNode *C = + isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true); + return C && (C->getAPIntValue().countTrailingOnes() >= NumBits); +} + +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, + bool AllowTruncation) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); + + // BuildVectors can truncate their operands. Ignore that case here unless + // AllowTruncation is set. + if (CN && (UndefElements.none() || AllowUndefs)) { + EVT CVT = CN->getValueType(0); + EVT NSVT = N.getValueType().getScalarType(); + assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); + if (AllowTruncation || (CVT == NSVT)) + return CN; + } + } + + return nullptr; +} + +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, + bool AllowUndefs, + bool AllowTruncation) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements); + + // BuildVectors can truncate their operands. Ignore that case here unless + // AllowTruncation is set. + if (CN && (UndefElements.none() || AllowUndefs)) { + EVT CVT = CN->getValueType(0); + EVT NSVT = N.getValueType().getScalarType(); + assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); + if (AllowTruncation || (CVT == NSVT)) + return CN; + } + } + + return nullptr; +} + +ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { + if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); + if (CN && (UndefElements.none() || AllowUndefs)) + return CN; + } + + return nullptr; +} + +ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, + const APInt &DemandedElts, + bool AllowUndefs) { + if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantFPSDNode *CN = + BV->getConstantFPSplatNode(DemandedElts, &UndefElements); + if (CN && (UndefElements.none() || AllowUndefs)) + return CN; + } + + return nullptr; +} + +bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { + // TODO: may want to use peekThroughBitcast() here. + ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); + return C && C->isNullValue(); +} + +bool llvm::isOneOrOneSplat(SDValue N) { + // TODO: may want to use peekThroughBitcast() here. + unsigned BitWidth = N.getScalarValueSizeInBits(); + ConstantSDNode *C = isConstOrConstSplat(N); + return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth; +} + +bool llvm::isAllOnesOrAllOnesSplat(SDValue N) { + N = peekThroughBitcasts(N); + unsigned BitWidth = N.getScalarValueSizeInBits(); + ConstantSDNode *C = isConstOrConstSplat(N); + return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth; +} + +HandleSDNode::~HandleSDNode() { + DropOperands(); +} + +GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, + const DebugLoc &DL, + const GlobalValue *GA, EVT VT, + int64_t o, unsigned TF) + : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { + TheGlobal = GA; +} + +AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, + EVT VT, unsigned SrcAS, + unsigned DestAS) + : SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)), + SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} + +MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, + SDVTList VTs, EVT memvt, MachineMemOperand *mmo) + : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { + MemSDNodeBits.IsVolatile = MMO->isVolatile(); + MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal(); + MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable(); + MemSDNodeBits.IsInvariant = MMO->isInvariant(); + + // We check here that the size of the memory operand fits within the size of + // the MMO. This is because the MMO might indicate only a possible address + // range instead of specifying the affected memory addresses precisely. + assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); +} + +/// Profile - Gather unique data for the node. +/// +void SDNode::Profile(FoldingSetNodeID &ID) const { + AddNodeIDNode(ID, this); +} + +namespace { + + struct EVTArray { + std::vector<EVT> VTs; + + EVTArray() { + VTs.reserve(MVT::LAST_VALUETYPE); + for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i) + VTs.push_back(MVT((MVT::SimpleValueType)i)); + } + }; + +} // end anonymous namespace + +static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs; +static ManagedStatic<EVTArray> SimpleVTArray; +static ManagedStatic<sys::SmartMutex<true>> VTMutex; + +/// getValueTypeList - Return a pointer to the specified value type. +/// +const EVT *SDNode::getValueTypeList(EVT VT) { + if (VT.isExtended()) { + sys::SmartScopedLock<true> Lock(*VTMutex); + return &(*EVTs->insert(VT).first); + } else { + assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE && + "Value type out of range!"); + return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; + } +} + +/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the +/// indicated value. This method ignores uses of other values defined by this +/// operation. +bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + // TODO: Only iterate over uses of a given value of the node + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { + if (UI.getUse().getResNo() == Value) { + if (NUses == 0) + return false; + --NUses; + } + } + + // Found exactly the right number of uses? + return NUses == 0; +} + +/// hasAnyUseOfValue - Return true if there are any use of the indicated +/// value. This method ignores uses of other values defined by this operation. +bool SDNode::hasAnyUseOfValue(unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) + if (UI.getUse().getResNo() == Value) + return true; + + return false; +} + +/// isOnlyUserOf - Return true if this node is the only use of N. +bool SDNode::isOnlyUserOf(const SDNode *N) const { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (User == this) + Seen = true; + else + return false; + } + + return Seen; +} + +/// Return true if the only users of N are contained in Nodes. +bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (llvm::any_of(Nodes, + [&User](const SDNode *Node) { return User == Node; })) + Seen = true; + else + return false; + } + + return Seen; +} + +/// isOperand - Return true if this node is an operand of N. +bool SDValue::isOperandOf(const SDNode *N) const { + return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; }); +} + +bool SDNode::isOperandOf(const SDNode *N) const { + return any_of(N->op_values(), + [this](SDValue Op) { return this == Op.getNode(); }); +} + +/// reachesChainWithoutSideEffects - Return true if this operand (which must +/// be a chain) reaches the specified operand without crossing any +/// side-effecting instructions on any chain path. In practice, this looks +/// through token factors and non-volatile loads. In order to remain efficient, +/// this only looks a couple of nodes in, it does not do an exhaustive search. +/// +/// Note that we only need to examine chains when we're searching for +/// side-effects; SelectionDAG requires that all side-effects are represented +/// by chains, even if another operand would force a specific ordering. This +/// constraint is necessary to allow transformations like splitting loads. +bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, + unsigned Depth) const { + if (*this == Dest) return true; + + // Don't search too deeply, we just want to be able to see through + // TokenFactor's etc. + if (Depth == 0) return false; + + // If this is a token factor, all inputs to the TF happen in parallel. + if (getOpcode() == ISD::TokenFactor) { + // First, try a shallow search. + if (is_contained((*this)->ops(), Dest)) { + // We found the chain we want as an operand of this TokenFactor. + // Essentially, we reach the chain without side-effects if we could + // serialize the TokenFactor into a simple chain of operations with + // Dest as the last operation. This is automatically true if the + // chain has one use: there are no other ordering constraints. + // If the chain has more than one use, we give up: some other + // use of Dest might force a side-effect between Dest and the current + // node. + if (Dest.hasOneUse()) + return true; + } + // Next, try a deep search: check whether every operand of the TokenFactor + // reaches Dest. + return llvm::all_of((*this)->ops(), [=](SDValue Op) { + return Op.reachesChainWithoutSideEffects(Dest, Depth - 1); + }); + } + + // Loads don't have side effects, look through them. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { + if (Ld->isUnordered()) + return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); + } + return false; +} + +bool SDNode::hasPredecessor(const SDNode *N) const { + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + Worklist.push_back(this); + return hasPredecessorHelper(N, Visited, Worklist); +} + +void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { + this->Flags.intersectWith(Flags); +} + +SDValue +SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, + ArrayRef<ISD::NodeType> CandidateBinOps, + bool AllowPartials) { + // The pattern must end in an extract from index 0. + if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isNullConstant(Extract->getOperand(1))) + return SDValue(); + + // Match against one of the candidate binary ops. + SDValue Op = Extract->getOperand(0); + if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) { + return Op.getOpcode() == unsigned(BinOp); + })) + return SDValue(); + + // Floating-point reductions may require relaxed constraints on the final step + // of the reduction because they may reorder intermediate operations. + unsigned CandidateBinOp = Op.getOpcode(); + if (Op.getValueType().isFloatingPoint()) { + SDNodeFlags Flags = Op->getFlags(); + switch (CandidateBinOp) { + case ISD::FADD: + if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation()) + return SDValue(); + break; + default: + llvm_unreachable("Unhandled FP opcode for binop reduction"); + } + } + + // Matching failed - attempt to see if we did enough stages that a partial + // reduction from a subvector is possible. + auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) { + if (!AllowPartials || !Op) + return SDValue(); + EVT OpVT = Op.getValueType(); + EVT OpSVT = OpVT.getScalarType(); + EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts); + if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0)) + return SDValue(); + BinOp = (ISD::NodeType)CandidateBinOp; + return getNode( + ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, + getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout()))); + }; + + // At each stage, we're looking for something that looks like: + // %s = shufflevector <8 x i32> %op, <8 x i32> undef, + // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, + // i32 undef, i32 undef, i32 undef, i32 undef> + // %a = binop <8 x i32> %op, %s + // Where the mask changes according to the stage. E.g. for a 3-stage pyramid, + // we expect something like: + // <4,5,6,7,u,u,u,u> + // <2,3,u,u,u,u,u,u> + // <1,u,u,u,u,u,u,u> + // While a partial reduction match would be: + // <2,3,u,u,u,u,u,u> + // <1,u,u,u,u,u,u,u> + unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); + SDValue PrevOp; + for (unsigned i = 0; i < Stages; ++i) { + unsigned MaskEnd = (1 << i); + + if (Op.getOpcode() != CandidateBinOp) + return PartialReduction(PrevOp, MaskEnd); + + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(Op0); + if (Shuffle) { + Op = Op1; + } else { + Shuffle = dyn_cast<ShuffleVectorSDNode>(Op1); + Op = Op0; + } + + // The first operand of the shuffle should be the same as the other operand + // of the binop. + if (!Shuffle || Shuffle->getOperand(0) != Op) + return PartialReduction(PrevOp, MaskEnd); + + // Verify the shuffle has the expected (at this stage of the pyramid) mask. + for (int Index = 0; Index < (int)MaskEnd; ++Index) + if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index)) + return PartialReduction(PrevOp, MaskEnd); + + PrevOp = Op; + } + + BinOp = (ISD::NodeType)CandidateBinOp; + return Op; +} + +SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { + assert(N->getNumValues() == 1 && + "Can't unroll a vector with multiple results!"); + + EVT VT = N->getValueType(0); + unsigned NE = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(N); + + SmallVector<SDValue, 8> Scalars; + SmallVector<SDValue, 4> Operands(N->getNumOperands()); + + // If ResNE is 0, fully unroll the vector op. + if (ResNE == 0) + ResNE = NE; + else if (NE > ResNE) + NE = ResNE; + + unsigned i; + for (i= 0; i != NE; ++i) { + for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { + SDValue Operand = N->getOperand(j); + EVT OperandVT = Operand.getValueType(); + if (OperandVT.isVector()) { + // A vector operand; extract a single element. + EVT OperandEltVT = OperandVT.getVectorElementType(); + Operands[j] = + getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, + getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout()))); + } else { + // A scalar operand; just use it as is. + Operands[j] = Operand; + } + } + + switch (N->getOpcode()) { + default: { + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands, + N->getFlags())); + break; + } + case ISD::VSELECT: + Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], + getShiftAmountOperand(Operands[0].getValueType(), + Operands[1]))); + break; + case ISD::SIGN_EXTEND_INREG: { + EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType(); + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, + Operands[0], + getValueType(ExtVT))); + } + } + } + + for (; i < ResNE; ++i) + Scalars.push_back(getUNDEF(EltVT)); + + EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE); + return getBuildVector(VecVT, dl, Scalars); +} + +std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp( + SDNode *N, unsigned ResNE) { + unsigned Opcode = N->getOpcode(); + assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO || + Opcode == ISD::USUBO || Opcode == ISD::SSUBO || + Opcode == ISD::UMULO || Opcode == ISD::SMULO) && + "Expected an overflow opcode"); + + EVT ResVT = N->getValueType(0); + EVT OvVT = N->getValueType(1); + EVT ResEltVT = ResVT.getVectorElementType(); + EVT OvEltVT = OvVT.getVectorElementType(); + SDLoc dl(N); + + // If ResNE is 0, fully unroll the vector op. + unsigned NE = ResVT.getVectorNumElements(); + if (ResNE == 0) + ResNE = NE; + else if (NE > ResNE) + NE = ResNE; + + SmallVector<SDValue, 8> LHSScalars; + SmallVector<SDValue, 8> RHSScalars; + ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE); + ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE); + + EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT); + SDVTList VTs = getVTList(ResEltVT, SVT); + SmallVector<SDValue, 8> ResScalars; + SmallVector<SDValue, 8> OvScalars; + for (unsigned i = 0; i < NE; ++i) { + SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]); + SDValue Ov = + getSelect(dl, OvEltVT, Res.getValue(1), + getBoolConstant(true, dl, OvEltVT, ResVT), + getConstant(0, dl, OvEltVT)); + + ResScalars.push_back(Res); + OvScalars.push_back(Ov); + } + + ResScalars.append(ResNE - NE, getUNDEF(ResEltVT)); + OvScalars.append(ResNE - NE, getUNDEF(OvEltVT)); + + EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE); + EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE); + return std::make_pair(getBuildVector(NewResVT, dl, ResScalars), + getBuildVector(NewOvVT, dl, OvScalars)); +} + +bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, + LoadSDNode *Base, + unsigned Bytes, + int Dist) const { + if (LD->isVolatile() || Base->isVolatile()) + return false; + // TODO: probably too restrictive for atomics, revisit + if (!LD->isSimple()) + return false; + if (LD->isIndexed() || Base->isIndexed()) + return false; + if (LD->getChain() != Base->getChain()) + return false; + EVT VT = LD->getValueType(0); + if (VT.getSizeInBits() / 8 != Bytes) + return false; + + auto BaseLocDecomp = BaseIndexOffset::match(Base, *this); + auto LocDecomp = BaseIndexOffset::match(LD, *this); + + int64_t Offset = 0; + if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) + return (Dist * Bytes == Offset); + return false; +} + +/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if +/// it cannot be inferred. +unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { + // If this is a GlobalAddress + cst, return the alignment. + const GlobalValue *GV; + int64_t GVOffset = 0; + if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { + unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType()); + KnownBits Known(IdxWidth); + llvm::computeKnownBits(GV, Known, getDataLayout()); + unsigned AlignBits = Known.countMinTrailingZeros(); + unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; + if (Align) + return MinAlign(Align, GVOffset); + } + + // If this is a direct reference to a stack slot, use information about the + // stack slot's alignment. + int FrameIdx = INT_MIN; + int64_t FrameOffset = 0; + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { + FrameIdx = FI->getIndex(); + } else if (isBaseWithConstantOffset(Ptr) && + isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + // Handle FI+Cst + FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + FrameOffset = Ptr.getConstantOperandVal(1); + } + + if (FrameIdx != INT_MIN) { + const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); + unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), + FrameOffset); + return FIInfoAlign; + } + + return 0; +} + +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split (or expanded) into two not necessarily identical pieces. +std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { + // Currently all types are split in half. + EVT LoVT, HiVT; + if (!VT.isVector()) + LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); + else + LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext()); + + return std::make_pair(LoVT, HiVT); +} + +/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the +/// low/high part. +std::pair<SDValue, SDValue> +SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, + const EVT &HiVT) { + assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <= + N.getValueType().getVectorNumElements() && + "More vector elements requested than available!"); + SDValue Lo, Hi; + Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, + getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); + Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, + getConstant(LoVT.getVectorNumElements(), DL, + TLI->getVectorIdxTy(getDataLayout()))); + return std::make_pair(Lo, Hi); +} + +/// Widen the vector up to the next power of two using INSERT_SUBVECTOR. +SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { + EVT VT = N.getValueType(); + EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), + NextPowerOf2(VT.getVectorNumElements())); + return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N, + getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); +} + +void SelectionDAG::ExtractVectorElements(SDValue Op, + SmallVectorImpl<SDValue> &Args, + unsigned Start, unsigned Count) { + EVT VT = Op.getValueType(); + if (Count == 0) + Count = VT.getVectorNumElements(); + + EVT EltVT = VT.getVectorElementType(); + EVT IdxTy = TLI->getVectorIdxTy(getDataLayout()); + SDLoc SL(Op); + for (unsigned i = Start, e = Start + Count; i != e; ++i) { + Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, + Op, getConstant(i, SL, IdxTy))); + } +} + +// getAddressSpace - Return the address space this GlobalAddress belongs to. +unsigned GlobalAddressSDNode::getAddressSpace() const { + return getGlobal()->getType()->getAddressSpace(); +} + +Type *ConstantPoolSDNode::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + +bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, + unsigned &SplatBitSize, + bool &HasAnyUndefs, + unsigned MinSplatBits, + bool IsBigEndian) const { + EVT VT = getValueType(0); + assert(VT.isVector() && "Expected a vector type"); + unsigned VecWidth = VT.getSizeInBits(); + if (MinSplatBits > VecWidth) + return false; + + // FIXME: The widths are based on this node's type, but build vectors can + // truncate their operands. + SplatValue = APInt(VecWidth, 0); + SplatUndef = APInt(VecWidth, 0); + + // Get the bits. Bits with undefined values (when the corresponding element + // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared + // in SplatValue. If any of the values are not constant, give up and return + // false. + unsigned int NumOps = getNumOperands(); + assert(NumOps > 0 && "isConstantSplat has 0-size build vector"); + unsigned EltWidth = VT.getScalarSizeInBits(); + + for (unsigned j = 0; j < NumOps; ++j) { + unsigned i = IsBigEndian ? NumOps - 1 - j : j; + SDValue OpVal = getOperand(i); + unsigned BitPos = j * EltWidth; + + if (OpVal.isUndef()) + SplatUndef.setBits(BitPos, BitPos + EltWidth); + else if (auto *CN = dyn_cast<ConstantSDNode>(OpVal)) + SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos); + else if (auto *CN = dyn_cast<ConstantFPSDNode>(OpVal)) + SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos); + else + return false; + } + + // The build_vector is all constants or undefs. Find the smallest element + // size that splats the vector. + HasAnyUndefs = (SplatUndef != 0); + + // FIXME: This does not work for vectors with elements less than 8 bits. + while (VecWidth > 8) { + unsigned HalfSize = VecWidth / 2; + APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize); + APInt LowValue = SplatValue.trunc(HalfSize); + APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize); + APInt LowUndef = SplatUndef.trunc(HalfSize); + + // If the two halves do not match (ignoring undef bits), stop here. + if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || + MinSplatBits > HalfSize) + break; + + SplatValue = HighValue | LowValue; + SplatUndef = HighUndef & LowUndef; + + VecWidth = HalfSize; + } + + SplatBitSize = VecWidth; + return true; +} + +SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts, + BitVector *UndefElements) const { + if (UndefElements) { + UndefElements->clear(); + UndefElements->resize(getNumOperands()); + } + assert(getNumOperands() == DemandedElts.getBitWidth() && + "Unexpected vector size"); + if (!DemandedElts) + return SDValue(); + SDValue Splatted; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; + SDValue Op = getOperand(i); + if (Op.isUndef()) { + if (UndefElements) + (*UndefElements)[i] = true; + } else if (!Splatted) { + Splatted = Op; + } else if (Splatted != Op) { + return SDValue(); + } + } + + if (!Splatted) { + unsigned FirstDemandedIdx = DemandedElts.countTrailingZeros(); + assert(getOperand(FirstDemandedIdx).isUndef() && + "Can only have a splat without a constant for all undefs."); + return getOperand(FirstDemandedIdx); + } + + return Splatted; +} + +SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { + APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); + return getSplatValue(DemandedElts, UndefElements); +} + +ConstantSDNode * +BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts, + BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantSDNode>( + getSplatValue(DemandedElts, UndefElements)); +} + +ConstantSDNode * +BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements)); +} + +ConstantFPSDNode * +BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts, + BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantFPSDNode>( + getSplatValue(DemandedElts, UndefElements)); +} + +ConstantFPSDNode * +BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements)); +} + +int32_t +BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, + uint32_t BitWidth) const { + if (ConstantFPSDNode *CN = + dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) { + bool IsExact; + APSInt IntVal(BitWidth); + const APFloat &APF = CN->getValueAPF(); + if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) != + APFloat::opOK || + !IsExact) + return -1; + + return IntVal.exactLogBase2(); + } + return -1; +} + +bool BuildVectorSDNode::isConstant() const { + for (const SDValue &Op : op_values()) { + unsigned Opc = Op.getOpcode(); + if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP) + return false; + } + return true; +} + +bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { + // Find the first non-undef value in the shuffle mask. + unsigned i, e; + for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) + /* search */; + + // If all elements are undefined, this shuffle can be considered a splat + // (although it should eventually get simplified away completely). + if (i == e) + return true; + + // Make sure all remaining elements are either undef or the same as the first + // non-undef value. + for (int Idx = Mask[i]; i != e; ++i) + if (Mask[i] >= 0 && Mask[i] != Idx) + return false; + return true; +} + +// Returns the SDNode if it is a constant integer BuildVector +// or constant integer. +SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { + if (isa<ConstantSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) + return N.getNode(); + // Treat a GlobalAddress supporting constant offset folding as a + // constant integer. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N)) + if (GA->getOpcode() == ISD::GlobalAddress && + TLI->isOffsetFoldingLegal(GA)) + return GA; + return nullptr; +} + +SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { + if (isa<ConstantFPSDNode>(N)) + return N.getNode(); + + if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) + return N.getNode(); + + return nullptr; +} + +void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { + assert(!Node->OperandList && "Node already has operands"); + assert(SDNode::getMaxNumOperands() >= Vals.size() && + "too many operands to fit into SDNode"); + SDUse *Ops = OperandRecycler.allocate( + ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); + + bool IsDivergent = false; + for (unsigned I = 0; I != Vals.size(); ++I) { + Ops[I].setUser(Node); + Ops[I].setInitial(Vals[I]); + if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence. + IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent(); + } + Node->NumOperands = Vals.size(); + Node->OperandList = Ops; + IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); + if (!TLI->isSDNodeAlwaysUniform(Node)) + Node->SDNodeBits.IsDivergent = IsDivergent; + checkForCycles(Node); +} + +SDValue SelectionDAG::getTokenFactor(const SDLoc &DL, + SmallVectorImpl<SDValue> &Vals) { + size_t Limit = SDNode::getMaxNumOperands(); + while (Vals.size() > Limit) { + unsigned SliceIdx = Vals.size() - Limit; + auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit); + SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs); + Vals.erase(Vals.begin() + SliceIdx, Vals.end()); + Vals.emplace_back(NewTF); + } + return getNode(ISD::TokenFactor, DL, MVT::Other, Vals); +} + +#ifndef NDEBUG +static void checkForCyclesHelper(const SDNode *N, + SmallPtrSetImpl<const SDNode*> &Visited, + SmallPtrSetImpl<const SDNode*> &Checked, + const llvm::SelectionDAG *DAG) { + // If this node has already been checked, don't check it again. + if (Checked.count(N)) + return; + + // If a node has already been visited on this depth-first walk, reject it as + // a cycle. + if (!Visited.insert(N).second) { + errs() << "Detected cycle in SelectionDAG\n"; + dbgs() << "Offending node:\n"; + N->dumprFull(DAG); dbgs() << "\n"; + abort(); + } + + for (const SDValue &Op : N->op_values()) + checkForCyclesHelper(Op.getNode(), Visited, Checked, DAG); + + Checked.insert(N); + Visited.erase(N); +} +#endif + +void llvm::checkForCycles(const llvm::SDNode *N, + const llvm::SelectionDAG *DAG, + bool force) { +#ifndef NDEBUG + bool check = force; +#ifdef EXPENSIVE_CHECKS + check = true; +#endif // EXPENSIVE_CHECKS + if (check) { + assert(N && "Checking nonexistent SDNode"); + SmallPtrSet<const SDNode*, 32> visited; + SmallPtrSet<const SDNode*, 32> checked; + checkForCyclesHelper(N, visited, checked, DAG); + } +#endif // !NDEBUG +} + +void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) { + checkForCycles(DAG->getRoot().getNode(), DAG, force); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp new file mode 100644 index 0000000000000..3a53ab9717a45 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -0,0 +1,298 @@ +//==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include <cstdint> + +using namespace llvm; + +bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other, + const SelectionDAG &DAG, + int64_t &Off) const { + // Conservatively fail if we a match failed.. + if (!Base.getNode() || !Other.Base.getNode()) + return false; + if (!hasValidOffset() || !Other.hasValidOffset()) + return false; + // Initial Offset difference. + Off = *Other.Offset - *Offset; + + if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) { + // Trivial match. + if (Other.Base == Base) + return true; + + // Match GlobalAddresses + if (auto *A = dyn_cast<GlobalAddressSDNode>(Base)) + if (auto *B = dyn_cast<GlobalAddressSDNode>(Other.Base)) + if (A->getGlobal() == B->getGlobal()) { + Off += B->getOffset() - A->getOffset(); + return true; + } + + // Match Constants + if (auto *A = dyn_cast<ConstantPoolSDNode>(Base)) + if (auto *B = dyn_cast<ConstantPoolSDNode>(Other.Base)) { + bool IsMatch = + A->isMachineConstantPoolEntry() == B->isMachineConstantPoolEntry(); + if (IsMatch) { + if (A->isMachineConstantPoolEntry()) + IsMatch = A->getMachineCPVal() == B->getMachineCPVal(); + else + IsMatch = A->getConstVal() == B->getConstVal(); + } + if (IsMatch) { + Off += B->getOffset() - A->getOffset(); + return true; + } + } + + const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + + // Match FrameIndexes. + if (auto *A = dyn_cast<FrameIndexSDNode>(Base)) + if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base)) { + // Equal FrameIndexes - offsets are directly comparable. + if (A->getIndex() == B->getIndex()) + return true; + // Non-equal FrameIndexes - If both frame indices are fixed + // we know their relative offsets and can compare them. Otherwise + // we must be conservative. + if (MFI.isFixedObjectIndex(A->getIndex()) && + MFI.isFixedObjectIndex(B->getIndex())) { + Off += MFI.getObjectOffset(B->getIndex()) - + MFI.getObjectOffset(A->getIndex()); + return true; + } + } + } + return false; +} + +bool BaseIndexOffset::computeAliasing(const SDNode *Op0, + const Optional<int64_t> NumBytes0, + const SDNode *Op1, + const Optional<int64_t> NumBytes1, + const SelectionDAG &DAG, bool &IsAlias) { + + BaseIndexOffset BasePtr0 = match(Op0, DAG); + BaseIndexOffset BasePtr1 = match(Op1, DAG); + + if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode())) + return false; + int64_t PtrDiff; + if (NumBytes0.hasValue() && NumBytes1.hasValue() && + BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) { + // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the + // following situations arise: + IsAlias = !( + // [----BasePtr0----] + // [---BasePtr1--] + // ========PtrDiff========> + (*NumBytes0 <= PtrDiff) || + // [----BasePtr0----] + // [---BasePtr1--] + // =====(-PtrDiff)====> + (PtrDiff + *NumBytes1 <= 0)); // i.e. *NumBytes1 < -PtrDiff. + return true; + } + // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be + // able to calculate their relative offset if at least one arises + // from an alloca. However, these allocas cannot overlap and we + // can infer there is no alias. + if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase())) + if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + // If the base are the same frame index but the we couldn't find a + // constant offset, (indices are different) be conservative. + if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || + !MFI.isFixedObjectIndex(B->getIndex()))) { + IsAlias = false; + return true; + } + } + + bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase()); + bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase()); + bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase()); + bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase()); + bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase()); + bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase()); + + // If of mismatched base types or checkable indices we can check + // they do not alias. + if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || + (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && + (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) { + IsAlias = false; + return true; + } + return false; // Cannot determine whether the pointers alias. +} + +bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, + const BaseIndexOffset &Other, + int64_t OtherBitSize, int64_t &BitOffset) const { + int64_t Offset; + if (!equalBaseIndex(Other, DAG, Offset)) + return false; + if (Offset >= 0) { + // Other is after *this: + // [-------*this---------] + // [---Other--] + // ==Offset==> + BitOffset = 8 * Offset; + return BitOffset + OtherBitSize <= BitSize; + } + // Other starts strictly before *this, it cannot be fully contained. + // [-------*this---------] + // [--Other--] + return false; +} + +/// Parses tree in Ptr for base, index, offset addresses. +static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, + const SelectionDAG &DAG) { + SDValue Ptr = N->getBasePtr(); + + // (((B + I*M) + c)) + c ... + SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr); + SDValue Index = SDValue(); + int64_t Offset = 0; + bool IsIndexSignExt = false; + + // pre-inc/pre-dec ops are components of EA. + if (N->getAddressingMode() == ISD::PRE_INC) { + if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset())) + Offset += C->getSExtValue(); + else // If unknown, give up now. + return BaseIndexOffset(SDValue(), SDValue(), 0, false); + } else if (N->getAddressingMode() == ISD::PRE_DEC) { + if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset())) + Offset -= C->getSExtValue(); + else // If unknown, give up now. + return BaseIndexOffset(SDValue(), SDValue(), 0, false); + } + + // Consume constant adds & ors with appropriate masking. + while (true) { + switch (Base->getOpcode()) { + case ISD::OR: + // Only consider ORs which act as adds. + if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) + if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) { + Offset += C->getSExtValue(); + Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0)); + continue; + } + break; + case ISD::ADD: + if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) { + Offset += C->getSExtValue(); + Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0)); + continue; + } + break; + case ISD::LOAD: + case ISD::STORE: { + auto *LSBase = cast<LSBaseSDNode>(Base.getNode()); + unsigned int IndexResNo = (Base->getOpcode() == ISD::LOAD) ? 1 : 0; + if (LSBase->isIndexed() && Base.getResNo() == IndexResNo) + if (auto *C = dyn_cast<ConstantSDNode>(LSBase->getOffset())) { + auto Off = C->getSExtValue(); + if (LSBase->getAddressingMode() == ISD::PRE_DEC || + LSBase->getAddressingMode() == ISD::POST_DEC) + Offset -= Off; + else + Offset += Off; + Base = DAG.getTargetLoweringInfo().unwrapAddress(LSBase->getBasePtr()); + continue; + } + break; + } + } + // If we get here break out of the loop. + break; + } + + if (Base->getOpcode() == ISD::ADD) { + // TODO: The following code appears to be needless as it just + // bails on some Ptrs early, reducing the cases where we + // find equivalence. We should be able to remove this. + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Base is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Base->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt); + + // Look at Base + Index + Offset cases. + Index = Base->getOperand(1); + SDValue PotentialBase = Base->getOperand(0); + + // Skip signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } + + // Check if Index Offset pattern + if (Index->getOpcode() != ISD::ADD || + !isa<ConstantSDNode>(Index->getOperand(1))) + return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt); + + Offset += cast<ConstantSDNode>(Index->getOperand(1))->getSExtValue(); + Index = Index->getOperand(0); + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else + IsIndexSignExt = false; + Base = PotentialBase; + } + return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt); +} + +BaseIndexOffset BaseIndexOffset::match(const SDNode *N, + const SelectionDAG &DAG) { + if (const auto *LS0 = dyn_cast<LSBaseSDNode>(N)) + return matchLSNode(LS0, DAG); + if (const auto *LN = dyn_cast<LifetimeSDNode>(N)) { + if (LN->hasOffset()) + return BaseIndexOffset(LN->getOperand(1), SDValue(), LN->getOffset(), + false); + return BaseIndexOffset(LN->getOperand(1), SDValue(), false); + } + return BaseIndexOffset(); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + +LLVM_DUMP_METHOD void BaseIndexOffset::dump() const { + print(dbgs()); +} + +void BaseIndexOffset::print(raw_ostream& OS) const { + OS << "BaseIndexOffset base=["; + Base->print(OS); + OS << "] index=["; + if (Index) + Index->print(OS); + OS << "] offset=" << Offset; +} + +#endif diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp new file mode 100644 index 0000000000000..8c15563fcd23d --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -0,0 +1,10559 @@ +//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#include "SelectionDAGBuilder.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/SwiftErrorValueTracking.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/Local.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <limits> +#include <numeric> +#include <tuple> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace PatternMatch; +using namespace SwitchCG; + +#define DEBUG_TYPE "isel" + +/// LimitFloatPrecision - Generate low-precision inline sequences for +/// some float libcalls (6, 8 or 12 bits). +static unsigned LimitFloatPrecision; + +static cl::opt<unsigned, true> + LimitFPPrecision("limit-float-precision", + cl::desc("Generate low-precision inline sequences " + "for some float libcalls"), + cl::location(LimitFloatPrecision), cl::Hidden, + cl::init(0)); + +static cl::opt<unsigned> SwitchPeelThreshold( + "switch-peel-threshold", cl::Hidden, cl::init(66), + cl::desc("Set the case probability threshold for peeling the case from a " + "switch statement. A value greater than 100 will void this " + "optimization")); + +// Limit the width of DAG chains. This is important in general to prevent +// DAG-based analysis from blowing up. For example, alias analysis and +// load clustering may not complete in reasonable time. It is difficult to +// recognize and avoid this situation within each individual analysis, and +// future analyses are likely to have the same behavior. Limiting DAG width is +// the safe approach and will be especially important with global DAGs. +// +// MaxParallelChains default is arbitrarily high to avoid affecting +// optimization, but could be lowered to improve compile time. Any ld-ld-st-st +// sequence over this should have been converted to llvm.memcpy by the +// frontend. It is easy to induce this behavior with .ll code such as: +// %buffer = alloca [4096 x i8] +// %data = load [4096 x i8]* %argPtr +// store [4096 x i8] %data, [4096 x i8]* %buffer +static const unsigned MaxParallelChains = 64; + +// Return the calling convention if the Value passed requires ABI mangling as it +// is a parameter to a function or a return value from a function which is not +// an intrinsic. +static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) { + if (auto *R = dyn_cast<ReturnInst>(V)) + return R->getParent()->getParent()->getCallingConv(); + + if (auto *CI = dyn_cast<CallInst>(V)) { + const bool IsInlineAsm = CI->isInlineAsm(); + const bool IsIndirectFunctionCall = + !IsInlineAsm && !CI->getCalledFunction(); + + // It is possible that the call instruction is an inline asm statement or an + // indirect function call in which case the return value of + // getCalledFunction() would be nullptr. + const bool IsInstrinsicCall = + !IsInlineAsm && !IsIndirectFunctionCall && + CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic; + + if (!IsInlineAsm && !IsInstrinsicCall) + return CI->getCallingConv(); + } + + return None; +} + +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, const Value *V, + Optional<CallingConv::ID> CC); + +/// getCopyFromParts - Create a value that contains the specified legal parts +/// combined into the value they represent. If the parts combine to a type +/// larger than ValueVT then AssertOp can be used to specify whether the extra +/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT +/// (ISD::AssertSext). +static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, const Value *V, + Optional<CallingConv::ID> CC = None, + Optional<ISD::NodeType> AssertOp = None) { + if (ValueVT.isVector()) + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V, + CC); + + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + if (NumParts > 1) { + // Assemble the value from multiple parts. + if (ValueVT.isInteger()) { + unsigned PartBits = PartVT.getSizeInBits(); + unsigned ValueBits = ValueVT.getSizeInBits(); + + // Assemble the power of 2 part. + unsigned RoundParts = + (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts; + unsigned RoundBits = PartBits * RoundParts; + EVT RoundVT = RoundBits == ValueBits ? + ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); + SDValue Lo, Hi; + + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); + + if (RoundParts > 2) { + Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, + PartVT, HalfVT, V); + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, + RoundParts / 2, PartVT, HalfVT, V); + } else { + Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); + } + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); + + if (RoundParts < NumParts) { + // Assemble the trailing non-power-of-2 part. + unsigned OddParts = NumParts - RoundParts; + EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT, + OddVT, V, CC); + + // Combine the round and odd parts. + Lo = Val; + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); + Hi = + DAG.getNode(ISD::SHL, DL, TotalVT, Hi, + DAG.getConstant(Lo.getValueSizeInBits(), DL, + TLI.getPointerTy(DAG.getDataLayout()))); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); + Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); + } + } else if (PartVT.isFloatingPoint()) { + // FP split into multiple FP parts (for ppcf128) + assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && + "Unexpected split"); + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); + if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); + } else { + // FP split into integer parts (soft fp) + assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && + !PartVT.isVector() && "Unexpected split"); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC); + } + } + + // There is now one part, held in Val. Correct it to match ValueVT. + // PartEVT is the type of the register class that holds the value. + // ValueVT is the type of the inline asm operation. + EVT PartEVT = Val.getValueType(); + + if (PartEVT == ValueVT) + return Val; + + if (PartEVT.isInteger() && ValueVT.isFloatingPoint() && + ValueVT.bitsLT(PartEVT)) { + // For an FP value in an integer part, we need to truncate to the right + // width first. + PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); + } + + // Handle types that have the same size. + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + // Handle types with different sizes. + if (PartEVT.isInteger() && ValueVT.isInteger()) { + if (ValueVT.bitsLT(PartEVT)) { + // For a truncate, see if we have any information to + // indicate whether the truncated bits will always be + // zero or sign-extension. + if (AssertOp.hasValue()) + Val = DAG.getNode(*AssertOp, DL, PartEVT, Val, + DAG.getValueType(ValueVT)); + return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); + } + + if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + // FP_ROUND's are always exact here. + if (ValueVT.bitsLT(Val.getValueType())) + return DAG.getNode( + ISD::FP_ROUND, DL, ValueVT, Val, + DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()))); + + return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); + } + + // Handle MMX to a narrower integer type by bitcasting MMX to integer and + // then truncating. + if (PartEVT == MVT::x86mmx && ValueVT.isInteger() && + ValueVT.bitsLT(PartEVT)) { + Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val); + return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + report_fatal_error("Unknown mismatch in getCopyFromParts!"); +} + +static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, + const Twine &ErrMsg) { + const Instruction *I = dyn_cast_or_null<Instruction>(V); + if (!V) + return Ctx.emitError(ErrMsg); + + const char *AsmError = ", possible invalid constraint for vector type"; + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + return Ctx.emitError(I, ErrMsg + AsmError); + + return Ctx.emitError(I, ErrMsg); +} + +/// getCopyFromPartsVector - Create a value that contains the specified legal +/// parts combined into the value they represent. If the parts combine to a +/// type larger than ValueVT then AssertOp can be used to specify whether the +/// extra bits are known to be zero (ISD::AssertZext) or sign extended from +/// ValueVT (ISD::AssertSext). +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, const Value *V, + Optional<CallingConv::ID> CallConv) { + assert(ValueVT.isVector() && "Not a vector value"); + assert(NumParts > 0 && "No parts to assemble!"); + const bool IsABIRegCopy = CallConv.hasValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + // Handle a multi-element vector. + if (NumParts > 1) { + EVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs; + + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } + + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + assert(RegisterVT.getSizeInBits() == + Parts[0].getSimpleValueType().getSizeInBits() && + "Part type sizes don't match!"); + + // Assemble the parts into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + if (NumIntermediates == NumParts) { + // If the register was not expanded, truncate or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, + PartVT, IntermediateVT, V); + } else if (NumParts > 0) { + // If the intermediate type was expanded, build the intermediate + // operands from the parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, + PartVT, IntermediateVT, V); + } + + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the + // intermediate operands. + EVT BuiltVectorTy = + EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), + (IntermediateVT.isVector() + ? IntermediateVT.getVectorNumElements() * NumParts + : NumIntermediates)); + Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS + : ISD::BUILD_VECTOR, + DL, BuiltVectorTy, Ops); + } + + // There is now one part, held in Val. Correct it to match ValueVT. + EVT PartEVT = Val.getValueType(); + + if (PartEVT == ValueVT) + return Val; + + if (PartEVT.isVector()) { + // If the element type of the source/dest vectors are the same, but the + // parts vector has more elements than the value vector, then we have a + // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the + // elements we want. + if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { + assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + "Cannot narrow, it would be a lossy transformation"); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + // Vector/Vector bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && + "Cannot handle this kind of promotion"); + // Promoted vector extract + return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); + + } + + // Trivial bitcast if the types are the same size and the destination + // vector type is legal. + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && + TLI.isTypeLegal(ValueVT)) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + if (ValueVT.getVectorNumElements() != 1) { + // Certain ABIs require that vectors are passed as integers. For vectors + // are the same size, this is an obvious bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) { + // Bitcast Val back the original type and extract the corresponding + // vector we want. + unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); + EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), + ValueVT.getVectorElementType(), Elts); + Val = DAG.getBitcast(WiderVecType, Val); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + diagnosePossiblyInvalidConstraint( + *DAG.getContext(), V, "non-trivial scalar-to-vector conversion"); + return DAG.getUNDEF(ValueVT); + } + + // Handle cases such as i8 -> <1 x i1> + EVT ValueSVT = ValueVT.getVectorElementType(); + if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) + Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) + : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); + + return DAG.getBuildVector(ValueVT, DL, Val); +} + +static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, + SDValue Val, SDValue *Parts, unsigned NumParts, + MVT PartVT, const Value *V, + Optional<CallingConv::ID> CallConv); + +/// getCopyToParts - Create a series of nodes that contain the specified value +/// split into legal parts. If the parts contain more bits than Val, then, for +/// integers, ExtendKind can be used to specify how to generate the extra bits. +static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, + SDValue *Parts, unsigned NumParts, MVT PartVT, + const Value *V, + Optional<CallingConv::ID> CallConv = None, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + EVT ValueVT = Val.getValueType(); + + // Handle the vector case separately. + if (ValueVT.isVector()) + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, + CallConv); + + unsigned PartBits = PartVT.getSizeInBits(); + unsigned OrigNumParts = NumParts; + assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && + "Copying to an illegal type!"); + + if (NumParts == 0) + return; + + assert(!ValueVT.isVector() && "Vector case handled elsewhere"); + EVT PartEVT = PartVT; + if (PartEVT == ValueVT) { + assert(NumParts == 1 && "No-op copy with multiple parts!"); + Parts[0] = Val; + return; + } + + if (NumParts * PartBits > ValueVT.getSizeInBits()) { + // If the parts cover more bits than the value has, promote the value. + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + assert(NumParts == 1 && "Do not know what to promote to!"); + Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); + } else { + if (ValueVT.isFloatingPoint()) { + // FP values need to be bitcast, then extended if they are being put + // into a larger container. + ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && + "Unknown mismatch!"); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } + } else if (PartBits == ValueVT.getSizeInBits()) { + // Different types of the same size. + assert(NumParts == 1 && PartEVT != ValueVT); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { + // If the parts cover less bits than value has, truncate the value. + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && + "Unknown mismatch!"); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } + + // The value may have changed - recompute ValueVT. + ValueVT = Val.getValueType(); + assert(NumParts * PartBits == ValueVT.getSizeInBits() && + "Failed to tile the value with PartVT!"); + + if (NumParts == 1) { + if (PartEVT != ValueVT) { + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "scalar-to-vector conversion failed"); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } + + Parts[0] = Val; + return; + } + + // Expand the value into multiple parts. + if (NumParts & (NumParts - 1)) { + // The number of parts is not a power of 2. Split off and copy the tail. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Do not know what to expand to!"); + unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundBits = RoundParts * PartBits; + unsigned OddParts = NumParts - RoundParts; + SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, + DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false)); + + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V, + CallConv); + + if (DAG.getDataLayout().isBigEndian()) + // The odd parts were reversed by getCopyToParts - unreverse them. + std::reverse(Parts + RoundParts, Parts + NumParts); + + NumParts = RoundParts; + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + // The number of parts is a power of 2. Repeatedly bisect the value using + // EXTRACT_ELEMENT. + Parts[0] = DAG.getNode(ISD::BITCAST, DL, + EVT::getIntegerVT(*DAG.getContext(), + ValueVT.getSizeInBits()), + Val); + + for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { + for (unsigned i = 0; i < NumParts; i += StepSize) { + unsigned ThisBits = StepSize * PartBits / 2; + EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); + SDValue &Part0 = Parts[i]; + SDValue &Part1 = Parts[i+StepSize/2]; + + Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(1, DL)); + Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(0, DL)); + + if (ThisBits == PartBits && ThisVT != PartVT) { + Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); + Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); + } + } + } + + if (DAG.getDataLayout().isBigEndian()) + std::reverse(Parts, Parts + OrigNumParts); +} + +static SDValue widenVectorToPartType(SelectionDAG &DAG, + SDValue Val, const SDLoc &DL, EVT PartVT) { + if (!PartVT.isVector()) + return SDValue(); + + EVT ValueVT = Val.getValueType(); + unsigned PartNumElts = PartVT.getVectorNumElements(); + unsigned ValueNumElts = ValueVT.getVectorNumElements(); + if (PartNumElts > ValueNumElts && + PartVT.getVectorElementType() == ValueVT.getVectorElementType()) { + EVT ElementVT = PartVT.getVectorElementType(); + // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in + // undef elements. + SmallVector<SDValue, 16> Ops; + DAG.ExtractVectorElements(Val, Ops); + SDValue EltUndef = DAG.getUNDEF(ElementVT); + for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i) + Ops.push_back(EltUndef); + + // FIXME: Use CONCAT for 2x -> 4x. + return DAG.getBuildVector(PartVT, DL, Ops); + } + + return SDValue(); +} + +/// getCopyToPartsVector - Create a series of nodes that contain the specified +/// value split into legal parts. +static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, + SDValue Val, SDValue *Parts, unsigned NumParts, + MVT PartVT, const Value *V, + Optional<CallingConv::ID> CallConv) { + EVT ValueVT = Val.getValueType(); + assert(ValueVT.isVector() && "Not a vector"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const bool IsABIRegCopy = CallConv.hasValue(); + + if (NumParts == 1) { + EVT PartEVT = PartVT; + if (PartEVT == ValueVT) { + // Nothing to do. + } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { + // Bitconvert vector->vector case. + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) { + Val = Widened; + } else if (PartVT.isVector() && + PartEVT.getVectorElementType().bitsGE( + ValueVT.getVectorElementType()) && + PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { + + // Promoted vector extract + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } else { + if (ValueVT.getVectorNumElements() == 1) { + Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } else { + assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && + "lossy conversion of vector to scalar type"); + EVT IntermediateType = + EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getBitcast(IntermediateType, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } + } + + assert(Val.getValueType() == PartVT && "Unexpected vector part value type"); + Parts[0] = Val; + return; + } + + // Handle a multi-element vector. + EVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs; + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } + + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + + unsigned IntermediateNumElts = IntermediateVT.isVector() ? + IntermediateVT.getVectorNumElements() : 1; + + // Convert the vector to the appropiate type if necessary. + unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts; + + EVT BuiltVectorTy = EVT::getVectorVT( + *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); + MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + if (ValueVT != BuiltVectorTy) { + if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) + Val = Widened; + + Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); + } + + // Split the vector into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + for (unsigned i = 0; i != NumIntermediates; ++i) { + if (IntermediateVT.isVector()) { + Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, + DAG.getConstant(i * IntermediateNumElts, DL, IdxVT)); + } else { + Ops[i] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, + DAG.getConstant(i, DL, IdxVT)); + } + } + + // Split the intermediate operands into legal parts. + if (NumParts == NumIntermediates) { + // If the register was not expanded, promote or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv); + } else if (NumParts > 0) { + // If the intermediate type was expanded, split each the value into + // legal parts. + assert(NumIntermediates != 0 && "division by zero"); + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V, + CallConv); + } +} + +RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, + EVT valuevt, Optional<CallingConv::ID> CC) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), + RegCount(1, regs.size()), CallConv(CC) {} + +RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL, unsigned Reg, Type *Ty, + Optional<CallingConv::ID> CC) { + ComputeValueVTs(TLI, DL, Ty, ValueVTs); + + CallConv = CC; + + for (EVT ValueVT : ValueVTs) { + unsigned NumRegs = + isABIMangled() + ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT) + : TLI.getNumRegisters(Context, ValueVT); + MVT RegisterVT = + isABIMangled() + ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT) + : TLI.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + RegCount.push_back(NumRegs); + Reg += NumRegs; + } +} + +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, + FunctionLoweringInfo &FuncInfo, + const SDLoc &dl, SDValue &Chain, + SDValue *Flag, const Value *V) const { + // A Value with type {} or [0 x %t] needs no registers. + if (ValueVTs.empty()) + return SDValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Assemble the legal parts into the final values. + SmallVector<SDValue, 4> Values(ValueVTs.size()); + SmallVector<SDValue, 8> Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = RegCount[Value]; + MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), + CallConv.getValue(), RegVTs[Value]) + : RegVTs[Value]; + + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (!Flag) { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + } else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } + + Chain = P.getValue(1); + Parts[i] = P; + + // If the source register was virtual and if we know something about it, + // add an assert node. + if (!Register::isVirtualRegister(Regs[Part + i]) || + !RegisterVT.isInteger()) + continue; + + const FunctionLoweringInfo::LiveOutInfo *LOI = + FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); + if (!LOI) + continue; + + unsigned RegSize = RegisterVT.getScalarSizeInBits(); + unsigned NumSignBits = LOI->NumSignBits; + unsigned NumZeroBits = LOI->Known.countMinLeadingZeros(); + + if (NumZeroBits == RegSize) { + // The current value is a zero. + // Explicitly express that as it would be easier for + // optimizations to kick in. + Parts[i] = DAG.getConstant(0, dl, RegisterVT); + continue; + } + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt; + EVT FromVT(MVT::Other); + if (NumZeroBits) { + FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits); + isSExt = false; + } else if (NumSignBits > 1) { + FromVT = + EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1); + isSExt = true; + } else { + continue; + } + // Add an assertion node. + assert(FromVT != MVT::Other); + Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + } + + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs, + RegisterVT, ValueVT, V, CallConv); + Part += NumRegs; + Parts.clear(); + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); +} + +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, + const SDLoc &dl, SDValue &Chain, SDValue *Flag, + const Value *V, + ISD::NodeType PreferredExtendType) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ISD::NodeType ExtendKind = PreferredExtendType; + + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumParts = RegCount[Value]; + + MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), + CallConv.getValue(), RegVTs[Value]) + : RegVTs[Value]; + + if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) + ExtendKind = ISD::ZERO_EXTEND; + + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], + NumParts, RegisterVT, V, CallConv, ExtendKind); + Part += NumParts; + } + + // Copy the parts into the registers. + SmallVector<SDValue, 8> Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (!Flag) { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); + } else { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); + } + + Chains[i] = Part.getValue(0); + } + + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); +} + +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, const SDLoc &dl, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); + if (HasMatching) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); + else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) { + // Put the register class of the virtual registers in the flag word. That + // way, later passes can recompute register class constraints for inline + // assembly as well as normal instructions. + // Don't do this for tied operands that can use the regclass information + // from the def. + const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); + Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); + } + + SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); + Ops.push_back(Res); + + if (Code == InlineAsm::Kind_Clobber) { + // Clobbers should always have a 1:1 mapping with registers, and may + // reference registers that have illegal (e.g. vector) types. Hence, we + // shouldn't try to apply any sort of splitting logic to them. + assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() && + "No 1:1 mapping from clobbers to regs?"); + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + (void)SP; + for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) { + Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I])); + assert( + (Regs[I] != SP || + DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) && + "If we clobbered the stack pointer, MFI should know about it."); + } + return; + } + + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + MVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + unsigned TheReg = Regs[Reg++]; + Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); + } + } +} + +SmallVector<std::pair<unsigned, unsigned>, 4> +RegsForValue::getRegsAndSizes() const { + SmallVector<std::pair<unsigned, unsigned>, 4> OutVec; + unsigned I = 0; + for (auto CountAndVT : zip_first(RegCount, RegVTs)) { + unsigned RegCount = std::get<0>(CountAndVT); + MVT RegisterVT = std::get<1>(CountAndVT); + unsigned RegisterSize = RegisterVT.getSizeInBits(); + for (unsigned E = I + RegCount; I != E; ++I) + OutVec.push_back(std::make_pair(Regs[I], RegisterSize)); + } + return OutVec; +} + +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, + const TargetLibraryInfo *li) { + AA = aa; + GFI = gfi; + LibInfo = li; + DL = &DAG.getDataLayout(); + Context = DAG.getContext(); + LPadToCallSiteMap.clear(); + SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout()); +} + +void SelectionDAGBuilder::clear() { + NodeMap.clear(); + UnusedArgNodeMap.clear(); + PendingLoads.clear(); + PendingExports.clear(); + CurInst = nullptr; + HasTailCall = false; + SDNodeOrder = LowestSDNodeOrder; + StatepointLowering.clear(); +} + +void SelectionDAGBuilder::clearDanglingDebugInfo() { + DanglingDebugInfoMap.clear(); +} + +SDValue SelectionDAGBuilder::getRoot() { + if (PendingLoads.empty()) + return DAG.getRoot(); + + if (PendingLoads.size() == 1) { + SDValue Root = PendingLoads[0]; + DAG.setRoot(Root); + PendingLoads.clear(); + return Root; + } + + // Otherwise, we have to make a token factor node. + SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads); + PendingLoads.clear(); + DAG.setRoot(Root); + return Root; +} + +SDValue SelectionDAGBuilder::getControlRoot() { + SDValue Root = DAG.getRoot(); + + if (PendingExports.empty()) + return Root; + + // Turn all of the CopyToReg chains into one factored node. + if (Root.getOpcode() != ISD::EntryToken) { + unsigned i = 0, e = PendingExports.size(); + for (; i != e; ++i) { + assert(PendingExports[i].getNode()->getNumOperands() > 1); + if (PendingExports[i].getNode()->getOperand(0) == Root) + break; // Don't add the root if we already indirectly depend on it. + } + + if (i == e) + PendingExports.push_back(Root); + } + + Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + PendingExports); + PendingExports.clear(); + DAG.setRoot(Root); + return Root; +} + +void SelectionDAGBuilder::visit(const Instruction &I) { + // Set up outgoing PHI node register values before emitting the terminator. + if (I.isTerminator()) { + HandlePHINodesInSuccessorBlocks(I.getParent()); + } + + // Increase the SDNodeOrder if dealing with a non-debug instruction. + if (!isa<DbgInfoIntrinsic>(I)) + ++SDNodeOrder; + + CurInst = &I; + + visit(I.getOpcode(), I); + + if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) { + // Propagate the fast-math-flags of this IR instruction to the DAG node that + // maps to this instruction. + // TODO: We could handle all flags (nsw, etc) here. + // TODO: If an IR instruction maps to >1 node, only the final node will have + // flags set. + if (SDNode *Node = getNodeForIRValue(&I)) { + SDNodeFlags IncomingFlags; + IncomingFlags.copyFMF(*FPMO); + if (!Node->getFlags().isDefined()) + Node->setFlags(IncomingFlags); + else + Node->intersectFlagsWith(IncomingFlags); + } + } + + if (!I.isTerminator() && !HasTailCall && + !isStatepoint(&I)) // statepoints handle their exports internally + CopyToExportRegsIfNeeded(&I); + + CurInst = nullptr; +} + +void SelectionDAGBuilder::visitPHI(const PHINode &) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); +} + +void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { + // Note: this doesn't use InstVisitor, because it has to work with + // ConstantExpr's in addition to instructions. + switch (Opcode) { + default: llvm_unreachable("Unknown instruction type encountered!"); + // Build the switch statement using the Instruction.def file. +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; +#include "llvm/IR/Instruction.def" + } +} + +void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, + const DIExpression *Expr) { + auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) { + const DbgValueInst *DI = DDI.getDI(); + DIVariable *DanglingVariable = DI->getVariable(); + DIExpression *DanglingExpr = DI->getExpression(); + if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) { + LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n"); + return true; + } + return false; + }; + + for (auto &DDIMI : DanglingDebugInfoMap) { + DanglingDebugInfoVector &DDIV = DDIMI.second; + + // If debug info is to be dropped, run it through final checks to see + // whether it can be salvaged. + for (auto &DDI : DDIV) + if (isMatchingDbgValue(DDI)) + salvageUnresolvedDbgValue(DDI); + + DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end()); + } +} + +// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, +// generate the debug data structures now that we've seen its definition. +void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, + SDValue Val) { + auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V); + if (DanglingDbgInfoIt == DanglingDebugInfoMap.end()) + return; + + DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second; + for (auto &DDI : DDIV) { + const DbgValueInst *DI = DDI.getDI(); + assert(DI && "Ill-formed DanglingDebugInfo"); + DebugLoc dl = DDI.getdl(); + unsigned ValSDNodeOrder = Val.getNode()->getIROrder(); + unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); + DILocalVariable *Variable = DI->getVariable(); + DIExpression *Expr = DI->getExpression(); + assert(Variable->isValidLocationForIntrinsic(dl) && + "Expected inlined-at fields to agree"); + SDDbgValue *SDV; + if (Val.getNode()) { + // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a + // FuncArgumentDbgValue (it would be hoisted to the function entry, and if + // we couldn't resolve it directly when examining the DbgValue intrinsic + // in the first place we should not be more successful here). Unless we + // have some test case that prove this to be correct we should avoid + // calling EmitFuncArgumentDbgValue here. + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { + LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order=" + << DbgSDNodeOrder << "] for:\n " << *DI << "\n"); + LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump()); + // Increase the SDNodeOrder for the DbgValue here to make sure it is + // inserted after the definition of Val when emitting the instructions + // after ISel. An alternative could be to teach + // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly. + LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs() + << "changing SDNodeOrder from " << DbgSDNodeOrder << " to " + << ValSDNodeOrder << "\n"); + SDV = getDbgValue(Val, Variable, Expr, dl, + std::max(DbgSDNodeOrder, ValSDNodeOrder)); + DAG.AddDbgValue(SDV, Val.getNode(), false); + } else + LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI + << "in EmitFuncArgumentDbgValue\n"); + } else { + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + auto Undef = + UndefValue::get(DDI.getDI()->getVariableLocation()->getType()); + auto SDV = + DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + } + } + DDIV.clear(); +} + +void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { + Value *V = DDI.getDI()->getValue(); + DILocalVariable *Var = DDI.getDI()->getVariable(); + DIExpression *Expr = DDI.getDI()->getExpression(); + DebugLoc DL = DDI.getdl(); + DebugLoc InstDL = DDI.getDI()->getDebugLoc(); + unsigned SDOrder = DDI.getSDNodeOrder(); + + // Currently we consider only dbg.value intrinsics -- we tell the salvager + // that DW_OP_stack_value is desired. + assert(isa<DbgValueInst>(DDI.getDI())); + bool StackValue = true; + + // Can this Value can be encoded without any further work? + if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) + return; + + // Attempt to salvage back through as many instructions as possible. Bail if + // a non-instruction is seen, such as a constant expression or global + // variable. FIXME: Further work could recover those too. + while (isa<Instruction>(V)) { + Instruction &VAsInst = *cast<Instruction>(V); + DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue); + + // If we cannot salvage any further, and haven't yet found a suitable debug + // expression, bail out. + if (!NewExpr) + break; + + // New value and expr now represent this debuginfo. + V = VAsInst.getOperand(0); + Expr = NewExpr; + + // Some kind of simplification occurred: check whether the operand of the + // salvaged debug expression can be encoded in this DAG. + if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) { + LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n " + << DDI.getDI() << "\nBy stripping back to:\n " << V); + return; + } + } + + // This was the final opportunity to salvage this debug information, and it + // couldn't be done. Place an undef DBG_VALUE at this location to terminate + // any earlier variable location. + auto Undef = UndefValue::get(DDI.getDI()->getVariableLocation()->getType()); + auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + + LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI() + << "\n"); + LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0) + << "\n"); +} + +bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var, + DIExpression *Expr, DebugLoc dl, + DebugLoc InstDL, unsigned Order) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDDbgValue *SDV; + if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) || + isa<ConstantPointerNull>(V)) { + SDV = DAG.getConstantDbgValue(Var, Expr, V, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + return true; + } + + // If the Value is a frame index, we can create a FrameIndex debug value + // without relying on the DAG at all. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + auto SDV = + DAG.getFrameIndexDbgValue(Var, Expr, SI->second, + /*IsIndirect*/ false, dl, SDNodeOrder); + // Do not attach the SDNodeDbgValue to an SDNode: this variable location + // is still available even if the SDNode gets optimized out. + DAG.AddDbgValue(SDV, nullptr, false); + return true; + } + } + + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. + N = UnusedArgNodeMap[V]; + if (N.getNode()) { + if (EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N)) + return true; + SDV = getDbgValue(N, Var, Expr, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + return true; + } + + // Special rules apply for the first dbg.values of parameter variables in a + // function. Identify them by the fact they reference Argument Values, that + // they're parameters, and they are parameters of the current function. We + // need to let them dangle until they get an SDNode. + bool IsParamOfFunc = isa<Argument>(V) && Var->isParameter() && + !InstDL.getInlinedAt(); + if (!IsParamOfFunc) { + // The value is not used in this block yet (or it would have an SDNode). + // We still want the value to appear for the user if possible -- if it has + // an associated VReg, we can refer to that instead. + auto VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + unsigned Reg = VMI->second; + // If this is a PHI node, it may be split up into several MI PHI nodes + // (in FunctionLoweringInfo::set). + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, + V->getType(), None); + if (RFV.occupiesMultipleRegs()) { + unsigned Offset = 0; + unsigned BitsToDescribe = 0; + if (auto VarSize = Var->getSizeInBits()) + BitsToDescribe = *VarSize; + if (auto Fragment = Expr->getFragmentInfo()) + BitsToDescribe = Fragment->SizeInBits; + for (auto RegAndSize : RFV.getRegsAndSizes()) { + unsigned RegisterSize = RegAndSize.second; + // Bail out if all bits are described already. + if (Offset >= BitsToDescribe) + break; + unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) + ? BitsToDescribe - Offset + : RegisterSize; + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, FragmentSize); + if (!FragmentExpr) + continue; + SDV = DAG.getVRegDbgValue(Var, *FragmentExpr, RegAndSize.first, + false, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + Offset += RegisterSize; + } + } else { + SDV = DAG.getVRegDbgValue(Var, Expr, Reg, false, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + } + return true; + } + } + + return false; +} + +void SelectionDAGBuilder::resolveOrClearDbgInfo() { + // Try to fixup any remaining dangling debug info -- and drop it if we can't. + for (auto &Pair : DanglingDebugInfoMap) + for (auto &DDI : Pair.second) + salvageUnresolvedDbgValue(DDI); + clearDanglingDebugInfo(); +} + +/// getCopyFromRegs - If there was virtual register allocated for the value V +/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. +SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { + DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + SDValue Result; + + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), InReg, Ty, + None); // This is not an ABI copy. + SDValue Chain = DAG.getEntryNode(); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, + V); + resolveDanglingDebugInfo(V, Result); + } + + return Result; +} + +/// getValue - Return an SDValue for the given Value. +SDValue SelectionDAGBuilder::getValue(const Value *V) { + // If we already have an SDValue for this value, use it. It's important + // to do this first, so that we don't create a CopyFromReg if we already + // have a regular SDValue. + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + // If there's a virtual register allocated and initialized for this + // value, use it. + if (SDValue copyFromReg = getCopyFromRegs(V, V->getType())) + return copyFromReg; + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); + return Val; +} + +// Return true if SDValue exists for the given Value +bool SelectionDAGBuilder::findValue(const Value *V) const { + return (NodeMap.find(V) != NodeMap.end()) || + (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end()); +} + +/// getNonRegisterValue - Return an SDValue for the given Value, but +/// don't look in FuncInfo.ValueMap for a virtual register. +SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { + // If we already have an SDValue for this value, use it. + SDValue &N = NodeMap[V]; + if (N.getNode()) { + if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) { + // Remove the debug location from the node as the node is about to be used + // in a location which may differ from the original debug location. This + // is relevant to Constant and ConstantFP nodes because they can appear + // as constant expressions inside PHI nodes. + N->setDebugLoc(DebugLoc()); + } + return N; + } + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); + return Val; +} + +/// getValueImpl - Helper function for getValue and getNonRegisterValue. +/// Create an SDValue for the given value. +SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (const Constant *C = dyn_cast<Constant>(V)) { + EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) + return DAG.getConstant(*CI, getCurSDLoc(), VT); + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); + + if (isa<ConstantPointerNull>(C)) { + unsigned AS = V->getType()->getPointerAddressSpace(); + return DAG.getConstant(0, getCurSDLoc(), + TLI.getPointerTy(DAG.getDataLayout(), AS)); + } + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) + return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); + + if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) + return DAG.getUNDEF(VT); + + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + visit(CE->getOpcode(), *CE); + SDValue N1 = NodeMap[V]; + assert(N1.getNode() && "visit didn't populate the NodeMap!"); + return N1; + } + + if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) { + SmallVector<SDValue, 4> Constants; + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) { + SDNode *Val = getValue(*OI).getNode(); + // If the operand is an empty aggregate, there are no values. + if (!Val) continue; + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Constants.push_back(SDValue(Val, i)); + } + + return DAG.getMergeValues(Constants, getCurSDLoc()); + } + + if (const ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(C)) { + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Ops.push_back(SDValue(Val, i)); + } + + if (isa<ArrayType>(CDS->getType())) + return DAG.getMergeValues(Ops, getCurSDLoc()); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + } + + if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { + assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && + "Unknown struct or array constant!"); + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs); + unsigned NumElts = ValueVTs.size(); + if (NumElts == 0) + return SDValue(); // empty struct + SmallVector<SDValue, 4> Constants(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + EVT EltVT = ValueVTs[i]; + if (isa<UndefValue>(C)) + Constants[i] = DAG.getUNDEF(EltVT); + else if (EltVT.isFloatingPoint()) + Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT); + else + Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT); + } + + return DAG.getMergeValues(Constants, getCurSDLoc()); + } + + if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) + return DAG.getBlockAddress(BA, VT); + + VectorType *VecTy = cast<VectorType>(V->getType()); + unsigned NumElements = VecTy->getNumElements(); + + // Now that we know the number and type of the elements, get that number of + // elements into the Ops array based on what kind of constant it is. + SmallVector<SDValue, 16> Ops; + if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { + for (unsigned i = 0; i != NumElements; ++i) + Ops.push_back(getValue(CV->getOperand(i))); + } else { + assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); + EVT EltVT = + TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); + + SDValue Op; + if (EltVT.isFloatingPoint()) + Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); + else + Op = DAG.getConstant(0, getCurSDLoc(), EltVT); + Ops.assign(NumElements, Op); + } + + // Create a BUILD_VECTOR node. + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + } + + // If this is a static alloca, generate it as the frameindex instead of + // computation. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + return DAG.getFrameIndex(SI->second, + TLI.getFrameIndexTy(DAG.getDataLayout())); + } + + // If this is an instruction which fast-isel has deferred, select it now. + if (const Instruction *Inst = dyn_cast<Instruction>(V)) { + unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + + RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, + Inst->getType(), getABIRegCopyCC(V)); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + } + + llvm_unreachable("Can't get register for value!"); +} + +void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Pers == EHPersonality::CoreCLR; + bool IsSEH = isAsynchronousEHPersonality(Pers); + bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX; + MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; + if (!IsSEH) + CatchPadMBB->setIsEHScopeEntry(); + // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + CatchPadMBB->setIsEHFuncletEntry(); + // Wasm does not need catchpads anymore + if (!IsWasmCXX) + DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, + getControlRoot())); +} + +void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { + // Update machine-CFG edge. + MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; + FuncInfo.MBB->addSuccessor(TargetMBB); + + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsSEH = isAsynchronousEHPersonality(Pers); + if (IsSEH) { + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (TargetMBB != NextBlock(FuncInfo.MBB) || + TM.getOptLevel() == CodeGenOpt::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB))); + return; + } + + // Figure out the funclet membership for the catchret's successor. + // This will be used by the FuncletLayout pass to determine how to order the + // BB's. + // A 'catchret' returns to the outer scope's color. + Value *ParentPad = I.getCatchSwitchParentPad(); + const BasicBlock *SuccessorColor; + if (isa<ConstantTokenNone>(ParentPad)) + SuccessorColor = &FuncInfo.Fn->getEntryBlock(); + else + SuccessorColor = cast<Instruction>(ParentPad)->getParent(); + assert(SuccessorColor && "No parent funclet for catchret!"); + MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; + assert(SuccessorColorMBB && "No MBB for SuccessorColor!"); + + // Create the terminator node. + SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB), + DAG.getBasicBlock(SuccessorColorMBB)); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { + // Don't emit any special code for the cleanuppad instruction. It just marks + // the start of an EH scope/funclet. + FuncInfo.MBB->setIsEHScopeEntry(); + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + if (Pers != EHPersonality::Wasm_CXX) { + FuncInfo.MBB->setIsEHFuncletEntry(); + FuncInfo.MBB->setIsCleanupFuncletEntry(); + } +} + +// For wasm, there's alwyas a single catch pad attached to a catchswitch, and +// the control flow always stops at the single catch pad, as it does for a +// cleanup pad. In case the exception caught is not of the types the catch pad +// catches, it will be rethrown by a rethrow. +static void findWasmUnwindDestinations( + FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, + BranchProbability Prob, + SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> + &UnwindDests) { + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + if (isa<CleanupPadInst>(Pad)) { + // Stop on cleanup pads. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + UnwindDests.back().first->setIsEHScopeEntry(); + break; + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + // Add the catchpad handlers to the possible destinations. We don't + // continue to the unwind destination of the catchswitch for wasm. + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); + UnwindDests.back().first->setIsEHScopeEntry(); + } + break; + } else { + continue; + } + } +} + +/// When an invoke or a cleanupret unwinds to the next EH pad, there are +/// many places it could ultimately go. In the IR, we have a single unwind +/// destination, but in the machine CFG, we enumerate all the possible blocks. +/// This function skips over imaginary basic blocks that hold catchswitch +/// instructions, and finds all the "real" machine +/// basic block destinations. As those destinations may not be successors of +/// EHPadBB, here we also calculate the edge probability to those destinations. +/// The passed-in Prob is the edge probability to EHPadBB. +static void findUnwindDestinations( + FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, + BranchProbability Prob, + SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> + &UnwindDests) { + EHPersonality Personality = + classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; + bool IsSEH = isAsynchronousEHPersonality(Personality); + + if (IsWasmCXX) { + findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests); + assert(UnwindDests.size() <= 1 && + "There should be at most one unwind destination for wasm"); + return; + } + + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + BasicBlock *NewEHPadBB = nullptr; + if (isa<LandingPadInst>(Pad)) { + // Stop on landingpads. They are not funclets. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + break; + } else if (isa<CleanupPadInst>(Pad)) { + // Stop on cleanup pads. Cleanups are always funclet entries for all known + // personalities. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + UnwindDests.back().first->setIsEHScopeEntry(); + UnwindDests.back().first->setIsEHFuncletEntry(); + break; + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + // Add the catchpad handlers to the possible destinations. + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); + // For MSVC++ and the CLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + UnwindDests.back().first->setIsEHFuncletEntry(); + if (!IsSEH) + UnwindDests.back().first->setIsEHScopeEntry(); + } + NewEHPadBB = CatchSwitch->getUnwindDest(); + } else { + continue; + } + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (BPI && NewEHPadBB) + Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); + EHPadBB = NewEHPadBB; + } +} + +void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { + // Update successor info. + SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; + auto UnwindDest = I.getUnwindDest(); + BranchProbabilityInfo *BPI = FuncInfo.BPI; + BranchProbability UnwindDestProb = + (BPI && UnwindDest) + ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest) + : BranchProbability::getZero(); + findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second); + } + FuncInfo.MBB->normalizeSuccProbs(); + + // Create the terminator node. + SDValue Ret = + DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { + report_fatal_error("visitCatchSwitch not yet implemented!"); +} + +void SelectionDAGBuilder::visitRet(const ReturnInst &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto &DL = DAG.getDataLayout(); + SDValue Chain = getControlRoot(); + SmallVector<ISD::OutputArg, 8> Outs; + SmallVector<SDValue, 8> OutVals; + + // Calls to @llvm.experimental.deoptimize don't generate a return value, so + // lower + // + // %val = call <ty> @llvm.experimental.deoptimize() + // ret <ty> %val + // + // differently. + if (I.getParent()->getTerminatingDeoptimizeCall()) { + LowerDeoptimizingReturn(); + return; + } + + if (!FuncInfo.CanLowerReturn) { + unsigned DemoteReg = FuncInfo.DemoteRegister; + const Function *F = I.getParent()->getParent(); + + // Emit a store of the return value through the virtual register. + // Leave Outs empty so that LowerReturn won't try to load return + // registers the usual way. + SmallVector<EVT, 1> PtrValueVTs; + ComputeValueVTs(TLI, DL, + F->getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), + PtrValueVTs); + + SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + DemoteReg, PtrValueVTs[0]); + SDValue RetOp = getValue(I.getOperand(0)); + + SmallVector<EVT, 4> ValueVTs, MemVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs, + &Offsets); + unsigned NumValues = ValueVTs.size(); + + SmallVector<SDValue, 4> Chains(NumValues); + for (unsigned i = 0; i != NumValues; ++i) { + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); + + SDValue Val = RetOp.getValue(RetOp.getResNo() + i); + if (MemVTs[i] != ValueVTs[i]) + Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); + Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, + // FIXME: better loc info would be nice. + Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + } + + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), + MVT::Other, Chains); + } else if (I.getNumOperands() != 0) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues) { + SDValue RetOp = getValue(I.getOperand(0)); + + const Function *F = I.getParent()->getParent(); + + bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( + I.getOperand(0)->getType(), F->getCallingConv(), + /*IsVarArg*/ false); + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, + Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, + Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + LLVMContext &Context = F->getContext(); + bool RetInReg = F->getAttributes().hasAttribute( + AttributeList::ReturnIndex, Attribute::InReg); + + for (unsigned j = 0; j != NumValues; ++j) { + EVT VT = ValueVTs[j]; + + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) + VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); + + CallingConv::ID CC = F->getCallingConv(); + + unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT); + MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT); + SmallVector<SDValue, 4> Parts(NumParts); + getCopyToParts(DAG, getCurSDLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + j), + &Parts[0], NumParts, PartVT, &I, CC, ExtendKind); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (RetInReg) + Flags.setInReg(); + + if (I.getOperand(0)->getType()->isPointerTy()) { + Flags.setPointer(); + Flags.setPointerAddrSpace( + cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace()); + } + + if (NeedsRegBlock) { + Flags.setInConsecutiveRegs(); + if (j == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } + + // Propagate extension type if any + if (ExtendKind == ISD::SIGN_EXTEND) + Flags.setSExt(); + else if (ExtendKind == ISD::ZERO_EXTEND) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), + VT, /*isfixed=*/true, 0, 0)); + OutVals.push_back(Parts[i]); + } + } + } + } + + // Push in swifterror virtual register as the last element of Outs. This makes + // sure swifterror virtual register will be returned in the swifterror + // physical register. + const Function *F = I.getParent()->getParent(); + if (TLI.supportSwiftError() && + F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { + assert(SwiftError.getFunctionArg() && "Need a swift error argument"); + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + Flags.setSwiftError(); + Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, + EVT(TLI.getPointerTy(DL)) /*argvt*/, + true /*isfixed*/, 1 /*origidx*/, + 0 /*partOffs*/)); + // Create SDNode for the swifterror virtual register. + OutVals.push_back( + DAG.getRegister(SwiftError.getOrCreateVRegUseAt( + &I, FuncInfo.MBB, SwiftError.getFunctionArg()), + EVT(TLI.getPointerTy(DL)))); + } + + bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg(); + CallingConv::ID CallConv = + DAG.getMachineFunction().getFunction().getCallingConv(); + Chain = DAG.getTargetLoweringInfo().LowerReturn( + Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); + + // Verify that the target's LowerReturn behaved as expected. + assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + "LowerReturn didn't return a valid chain!"); + + // Update the DAG with the new chain value resulting from return lowering. + DAG.setRoot(Chain); +} + +/// CopyToExportRegsIfNeeded - If the given value has virtual registers +/// created for it, emit nodes to copy the value into the virtual +/// registers. +void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { + // Skip empty types + if (V->getType()->isEmptyTy()) + return; + + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + assert(!V->use_empty() && "Unused value assigned virtual registers!"); + CopyValueToVirtualRegister(V, VMI->second); + } +} + +/// ExportFromCurrentBlock - If this condition isn't known to be exported from +/// the current basic block, add it to ValueMap now so that we'll get a +/// CopyTo/FromReg. +void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { + // No need to export constants. + if (!isa<Instruction>(V) && !isa<Argument>(V)) return; + + // Already exported? + if (FuncInfo.isExportedInst(V)) return; + + unsigned Reg = FuncInfo.InitializeRegForValue(V); + CopyValueToVirtualRegister(V, Reg); +} + +bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, + const BasicBlock *FromBB) { + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. + if (const Instruction *VI = dyn_cast<Instruction>(V)) { + // Can export from current BB. + if (VI->getParent() == FromBB) + return true; + + // Is already exported, noop. + return FuncInfo.isExportedInst(V); + } + + // If this is an argument, we can export it if the BB is the entry block or + // if it is already exported. + if (isa<Argument>(V)) { + if (FromBB == &FromBB->getParent()->getEntryBlock()) + return true; + + // Otherwise, can only export this if it is already exported. + return FuncInfo.isExportedInst(V); + } + + // Otherwise, constants can always be exported. + return true; +} + +/// Return branch probability calculated by BranchProbabilityInfo for IR blocks. +BranchProbability +SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + BranchProbabilityInfo *BPI = FuncInfo.BPI; + const BasicBlock *SrcBB = Src->getBasicBlock(); + const BasicBlock *DstBB = Dst->getBasicBlock(); + if (!BPI) { + // If BPI is not available, set the default probability as 1 / N, where N is + // the number of successors. + auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1); + return BranchProbability(1, SuccSize); + } + return BPI->getEdgeProbability(SrcBB, DstBB); +} + +void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src, + MachineBasicBlock *Dst, + BranchProbability Prob) { + if (!FuncInfo.BPI) + Src->addSuccessorWithoutProb(Dst); + else { + if (Prob.isUnknown()) + Prob = getEdgeProbability(Src, Dst); + Src->addSuccessor(Dst, Prob); + } +} + +static bool InBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == BB; + return true; +} + +/// EmitBranchForMergedCondition - Helper method for FindMergedConditions. +/// This function emits a branch and is used at the leaves of an OR or an +/// AND operator tree. +void +SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + BranchProbability TProb, + BranchProbability FProb, + bool InvertCond) { + const BasicBlock *BB = CurBB->getBasicBlock(); + + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { + // The operands of the cmp have to be in this block. We don't know + // how to export them from some other block. If this is the first block + // of the sequence, no exporting is needed. + if (CurBB == SwitchBB || + (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && + isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { + ISD::CondCode Condition; + if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + ICmpInst::Predicate Pred = + InvertCond ? IC->getInversePredicate() : IC->getPredicate(); + Condition = getICmpCondCode(Pred); + } else { + const FCmpInst *FC = cast<FCmpInst>(Cond); + FCmpInst::Predicate Pred = + InvertCond ? FC->getInversePredicate() : FC->getPredicate(); + Condition = getFCmpCondCode(Pred); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); + } + + CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, + TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); + return; + } + } + + // Create a CaseBlock record representing this branch. + ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; + CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), + nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); +} + +void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, + BranchProbability TProb, + BranchProbability FProb, + bool InvertCond) { + // Skip over not part of the tree and remember to invert op and operands at + // next level. + Value *NotCond; + if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) && + InBlock(NotCond, CurBB->getBasicBlock())) { + FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, + !InvertCond); + return; + } + + const Instruction *BOp = dyn_cast<Instruction>(Cond); + // Compute the effective opcode for Cond, taking into account whether it needs + // to be inverted, e.g. + // and (not (or A, B)), C + // gets lowered as + // and (and (not A, not B), C) + unsigned BOpc = 0; + if (BOp) { + BOpc = BOp->getOpcode(); + if (InvertCond) { + if (BOpc == Instruction::And) + BOpc = Instruction::Or; + else if (BOpc == Instruction::Or) + BOpc = Instruction::And; + } + } + + // If this node is not part of the or/and tree, emit it as a branch. + if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || + BOpc != unsigned(Opc) || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, + TProb, FProb, InvertCond); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI(CurBB); + MachineFunction &MF = DAG.getMachineFunction(); + MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // BB1: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to + // A/(1+B) and 2B/(1+B). This choice assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + auto NewTrueProb = TProb / 2; + auto NewFalseProb = TProb / 2 + FProb; + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, + NewTrueProb, NewFalseProb, InvertCond); + + // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). + SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + Probs[0], Probs[1], InvertCond); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // BB1: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to + // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == + // TrueProb for BB1 * FalseProb for TmpBB. + + auto NewTrueProb = TProb + FProb / 2; + auto NewFalseProb = FProb / 2; + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, + NewTrueProb, NewFalseProb, InvertCond); + + // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). + SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + Probs[0], Probs[1], InvertCond); + } +} + +/// If the set of cases should be emitted as a series of branches, return true. +/// If we should emit this as a bunch of and/or'd together conditions, return +/// false. +bool +SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) { + if (Cases.size() != 2) return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].CC == Cases[1].CC && + isa<Constant>(Cases[0].CmpRHS) && + cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + + return true; +} + +void SelectionDAGBuilder::visitBr(const BranchInst &I) { + MachineBasicBlock *BrMBB = FuncInfo.MBB; + + // Update machine-CFG edges. + MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; + + if (I.isUnconditional()) { + // Update machine-CFG edges. + BrMBB->addSuccessor(Succ0MBB); + + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Succ0MBB))); + + return; + } + + // If this condition is one of the special cases we handle, do special stuff + // now. + const Value *CondVal = I.getCondition(); + MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; + + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // As long as jumps are not expensive, this should improve performance. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { + Instruction::BinaryOps Opcode = BOp->getOpcode(); + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && + !I.hasMetadata(LLVMContext::MD_unpredictable) && + (Opcode == Instruction::And || Opcode == Instruction::Or)) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, + Opcode, + getEdgeProbability(BrMBB, Succ0MBB), + getEdgeProbability(BrMBB, Succ1MBB), + /*InvertCond=*/false); + // If the compares in later blocks need to use values not currently + // exported from this block, export them now. This block should always + // be the first entry. + assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (ShouldEmitAsBranches(SL->SwitchCases)) { + for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS); + } + + // Emit the branch for this block. + visitSwitchCase(SL->SwitchCases[0], BrMBB); + SL->SwitchCases.erase(SL->SwitchCases.begin()); + return; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) + FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB); + + SL->SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), + nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc()); + + // Use visitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + visitSwitchCase(CB, BrMBB); +} + +/// visitSwitchCase - Emits the necessary code to represent a single node in +/// the binary search tree resulting from lowering a switch instruction. +void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB) { + SDValue Cond; + SDValue CondLHS = getValue(CB.CmpLHS); + SDLoc dl = CB.DL; + + if (CB.CC == ISD::SETTRUE) { + // Branch or fall through to TrueBB. + addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); + SwitchBB->normalizeSuccProbs(); + if (CB.TrueBB != NextBlock(SwitchBB)) { + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(), + DAG.getBasicBlock(CB.TrueBB))); + } + return; + } + + auto &TLI = DAG.getTargetLoweringInfo(); + EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType()); + + // Build the setcc now. + if (!CB.CmpMHS) { + // Fold "(X == true)" to X and "(X == false)" to !X to + // handle common cases produced by branch lowering. + if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && + CB.CC == ISD::SETEQ) + Cond = CondLHS; + else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && + CB.CC == ISD::SETEQ) { + SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); + } else { + SDValue CondRHS = getValue(CB.CmpRHS); + + // If a pointer's DAG type is larger than its memory type then the DAG + // values are zero-extended. This breaks signed comparisons so truncate + // back to the underlying type before doing the compare. + if (CondLHS.getValueType() != MemVT) { + CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT); + CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT); + } + Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC); + } + } else { + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); + + const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + + SDValue CmpOp = getValue(CB.CmpMHS); + EVT VT = CmpOp.getValueType(); + + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT), + ISD::SETLE); + } else { + SDValue SUB = DAG.getNode(ISD::SUB, dl, + VT, CmpOp, DAG.getConstant(Low, dl, VT)); + Cond = DAG.getSetCC(dl, MVT::i1, SUB, + DAG.getConstant(High-Low, dl, VT), ISD::SETULE); + } + } + + // Update successor info + addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); + // TrueBB and FalseBB are always different unless the incoming IR is + // degenerate. This only happens when running llc on weird IR. + if (CB.TrueBB != CB.FalseBB) + addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb); + SwitchBB->normalizeSuccProbs(); + + // If the lhs block is the next block, invert the condition so that we can + // fall through to the lhs instead of the rhs block. + if (CB.TrueBB == NextBlock(SwitchBB)) { + std::swap(CB.TrueBB, CB.FalseBB); + SDValue True = DAG.getConstant(1, dl, Cond.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); + } + + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(CB.TrueBB)); + + // Insert the false branch. Do this even if it's a fall through branch, + // this makes it easier to do DAG optimizations which require inverting + // the branch condition. + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); + + DAG.setRoot(BrCond); +} + +/// visitJumpTable - Emit JumpTable node in the current MBB +void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) { + // Emit the code for the jump table + assert(JT.Reg != -1U && "Should lower JT Header first!"); + EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), + JT.Reg, PTy); + SDValue Table = DAG.getJumpTable(JT.JTI, PTy); + SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), + MVT::Other, Index.getValue(1), + Table, Index); + DAG.setRoot(BrJumpTable); +} + +/// visitJumpTableHeader - This function emits necessary code to produce index +/// in the JumpTable from switch case. +void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT, + JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + + // Subtract the lowest switch case value from the value being switched on. + SDValue SwitchOp = getValue(JTH.SValue); + EVT VT = SwitchOp.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, + DAG.getConstant(JTH.First, dl, VT)); + + // The SDNode we just created, which holds the value being switched on minus + // the smallest case value, needs to be copied to a virtual register so it + // can be used as an index into the jump table in a subsequent basic block. + // This value may be smaller or larger than the target's pointer type, and + // therefore require extension or truncating. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout())); + + unsigned JumpTableReg = + FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout())); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, + JumpTableReg, SwitchOp); + JT.Reg = JumpTableReg; + + if (!JTH.OmitRangeCheck) { + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the + // largest case in the switch. + SDValue CMP = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); + + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, CopyTo, CMP, + DAG.getBasicBlock(JT.Default)); + + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != NextBlock(SwitchBB)) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(JT.MBB)); + + DAG.setRoot(BrCond); + } else { + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != NextBlock(SwitchBB)) + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo, + DAG.getBasicBlock(JT.MBB))); + else + DAG.setRoot(CopyTo); + } +} + +/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global +/// variable if there exists one. +static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, + SDValue &Chain) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); + Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent()); + MachineSDNode *Node = + DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); + if (Global) { + MachinePointerInfo MPInfo(Global); + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | + MachineMemOperand::MODereferenceable; + MachineMemOperand *MemRef = MF.getMachineMemOperand( + MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy)); + DAG.setNodeMemRefs(Node, {MemRef}); + } + if (PtrTy != PtrMemTy) + return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy); + return SDValue(Node, 0); +} + +/// Codegen a new tail for a stack protector check ParentMBB which has had its +/// tail spliced into a stack protector check success bb. +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB) { + + // First create the loads to the guard/stack slot for the comparison. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout()); + + MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); + int FI = MFI.getStackProtectorIndex(); + + SDValue Guard; + SDLoc dl = getCurSDLoc(); + SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); + const Module &M = *ParentBB->getParent()->getFunction().getParent(); + unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); + + // Generate code to load the content of the guard slot. + SDValue GuardVal = DAG.getLoad( + PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, + MachineMemOperand::MOVolatile); + + if (TLI.useStackGuardXorFP()) + GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl); + + // Retrieve guard check function, nullptr if instrumentation is inlined. + if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) { + // The target provides a guard check function to validate the guard value. + // Generate a call to that function with the content of the guard slot as + // argument. + FunctionType *FnTy = GuardCheckFn->getFunctionType(); + assert(FnTy->getNumParams() == 1 && "Invalid function signature"); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = GuardVal; + Entry.Ty = FnTy->getParamType(0); + if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg)) + Entry.IsInReg = true; + Args.push_back(Entry); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(DAG.getEntryNode()) + .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(), + getValue(GuardCheckFn), std::move(Args)); + + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + DAG.setRoot(Result.second); + return; + } + + // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. + // Otherwise, emit a volatile load to retrieve the stack guard value. + SDValue Chain = DAG.getEntryNode(); + if (TLI.useLoadStackGuardNode()) { + Guard = getLoadStackGuard(DAG, dl, Chain); + } else { + const Value *IRGuard = TLI.getSDagStackGuard(M); + SDValue GuardPtr = getValue(IRGuard); + + Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr, + MachinePointerInfo(IRGuard, 0), Align, + MachineMemOperand::MOVolatile); + } + + // Perform the comparison via a subtract/getsetcc. + EVT VT = Guard.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal); + + SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); + + // If the sub is not 0, then we know the guard/stackslot do not equal, so + // branch to failure MBB. + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, GuardVal.getOperand(0), + Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); + // Otherwise branch to success MBB. + SDValue Br = DAG.getNode(ISD::BR, dl, + MVT::Other, BrCond, + DAG.getBasicBlock(SPD.getSuccessMBB())); + + DAG.setRoot(Br); +} + +/// Codegen the failure basic block for a stack protector check. +/// +/// A failure stack protector machine basic block consists simply of a call to +/// __stack_chk_fail(). +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void +SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setDiscardResult(true); + SDValue Chain = + TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, + None, CallOptions, getCurSDLoc()).second; + // On PS4, the "return address" must still be within the calling function, + // even if it's at the very end, so emit an explicit TRAP here. + // Passing 'true' for doesNotReturn above won't generate the trap for us. + if (TM.getTargetTriple().isPS4CPU()) + Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); + + DAG.setRoot(Chain); +} + +/// visitBitTestHeader - This function emits necessary code to produce value +/// suitable for "bit tests" +void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, + MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + + // Subtract the minimum value. + SDValue SwitchOp = getValue(B.SValue); + EVT VT = SwitchOp.getValueType(); + SDValue RangeSub = + DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT)); + + // Determine the type of the test operands. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool UsePtrType = false; + if (!TLI.isTypeLegal(VT)) { + UsePtrType = true; + } else { + for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) + if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { + // Switch table case range are encoded into series of masks. + // Just use pointer type, it's guaranteed to fit. + UsePtrType = true; + break; + } + } + SDValue Sub = RangeSub; + if (UsePtrType) { + VT = TLI.getPointerTy(DAG.getDataLayout()); + Sub = DAG.getZExtOrTrunc(Sub, dl, VT); + } + + B.RegVT = VT.getSimpleVT(); + B.Reg = FuncInfo.CreateReg(B.RegVT); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub); + + MachineBasicBlock* MBB = B.Cases[0].ThisBB; + + if (!B.OmitRangeCheck) + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + addSuccessorWithProb(SwitchBB, MBB, B.Prob); + SwitchBB->normalizeSuccProbs(); + + SDValue Root = CopyTo; + if (!B.OmitRangeCheck) { + // Conditional branch to the default block. + SDValue RangeCmp = DAG.getSetCC(dl, + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + RangeSub.getValueType()), + RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()), + ISD::SETUGT); + + Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp, + DAG.getBasicBlock(B.Default)); + } + + // Avoid emitting unnecessary branches to the next block. + if (MBB != NextBlock(SwitchBB)) + Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB)); + + DAG.setRoot(Root); +} + +/// visitBitTestCase - this function produces one "bit test" +void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, + MachineBasicBlock* NextMBB, + BranchProbability BranchProbToNext, + unsigned Reg, + BitTestCase &B, + MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + MVT VT = BB.RegVT; + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); + SDValue Cmp; + unsigned PopCount = countPopulation(B.Mask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (PopCount == 1) { + // Testing for a single bit; just compare the shift count with what it + // would need to be to shift a 1 bit in that position. + Cmp = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), + ISD::SETEQ); + } else if (PopCount == BB.Range) { + // There is only one zero bit in the range, test for it directly. + Cmp = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), + ISD::SETNE); + } else { + // Make desired shift + SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, + DAG.getConstant(1, dl, VT), ShiftOp); + + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, dl, + VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); + Cmp = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); + } + + // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. + addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); + // The branch probability from SwitchBB to NextMBB is BranchProbToNext. + addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); + // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is + // one as they are relative probabilities (and thus work more like weights), + // and hence we need to normalize them to let the sum of them become one. + SwitchBB->normalizeSuccProbs(); + + SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, getControlRoot(), + Cmp, DAG.getBasicBlock(B.TargetBB)); + + // Avoid emitting unnecessary branches to the next block. + if (NextMBB != NextBlock(SwitchBB)) + BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd, + DAG.getBasicBlock(NextMBB)); + + DAG.setRoot(BrAnd); +} + +void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { + MachineBasicBlock *InvokeMBB = FuncInfo.MBB; + + // Retrieve successors. Look through artificial IR level blocks like + // catchswitch for successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; + const BasicBlock *EHPadBB = I.getSuccessor(1); + + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + "Cannot lower invokes with arbitrary operand bundles yet!"); + + const Value *Callee(I.getCalledValue()); + const Function *Fn = dyn_cast<Function>(Callee); + if (isa<InlineAsm>(Callee)) + visitInlineAsm(&I); + else if (Fn && Fn->isIntrinsic()) { + switch (Fn->getIntrinsicID()) { + default: + llvm_unreachable("Cannot invoke this intrinsic"); + case Intrinsic::donothing: + // Ignore invokes to @llvm.donothing: jump directly to the next BB. + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + visitPatchpoint(&I, EHPadBB); + break; + case Intrinsic::experimental_gc_statepoint: + LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); + break; + case Intrinsic::wasm_rethrow_in_catch: { + // This is usually done in visitTargetIntrinsic, but this intrinsic is + // special because it can be invoked, so we manually lower it to a DAG + // node here. + SmallVector<SDValue, 8> Ops; + Ops.push_back(getRoot()); // inchain + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Ops.push_back( + DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(), + TLI.getPointerTy(DAG.getDataLayout()))); + SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain + DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops)); + break; + } + } + } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) { + // Currently we do not lower any intrinsic calls with deopt operand bundles. + // Eventually we will support lowering the @llvm.experimental.deoptimize + // intrinsic, and right now there are no plans to support other intrinsics + // with deopt state. + LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); + } else { + LowerCallTo(&I, getValue(Callee), false, EHPadBB); + } + + // If the value of the invoke is used outside of its defining block, make it + // available as a virtual register. + // We already took care of the exported value for the statepoint instruction + // during call to the LowerStatepoint. + if (!isStatepoint(I)) { + CopyToExportRegsIfNeeded(&I); + } + + SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; + BranchProbabilityInfo *BPI = FuncInfo.BPI; + BranchProbability EHPadBBProb = + BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) + : BranchProbability::getZero(); + findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests); + + // Update successor info. + addSuccessorWithProb(InvokeMBB, Return); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); + } + InvokeMBB->normalizeSuccProbs(); + + // Drop into normal successor. + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); +} + +void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { + MachineBasicBlock *CallBrMBB = FuncInfo.MBB; + + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + "Cannot lower callbrs with arbitrary operand bundles yet!"); + + assert(isa<InlineAsm>(I.getCalledValue()) && + "Only know how to handle inlineasm callbr"); + visitInlineAsm(&I); + + // Retrieve successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; + + // Update successor info. + addSuccessorWithProb(CallBrMBB, Return); + for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { + MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)]; + addSuccessorWithProb(CallBrMBB, Target); + } + CallBrMBB->normalizeSuccProbs(); + + // Drop into default successor. + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); +} + +void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); +} + +void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { + assert(FuncInfo.MBB->isEHPad() && + "Call to landingpad not in landing pad!"); + + // If there aren't registers to copy the values into (e.g., during SjLj + // exceptions), then don't bother to create these DAG nodes. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn(); + if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && + TLI.getExceptionSelectorRegister(PersonalityFn) == 0) + return; + + // If landingpad's return type is token type, we don't create DAG nodes + // for its exception pointer and selector value. The extraction of exception + // pointer or selector value from token type landingpads is not currently + // supported. + if (LP.getType()->isTokenTy()) + return; + + SmallVector<EVT, 2> ValueVTs; + SDLoc dl = getCurSDLoc(); + ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs); + assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); + + // Get the two live-in registers as SDValues. The physregs have already been + // copied into virtual registers. + SDValue Ops[2]; + if (FuncInfo.ExceptionPointerVirtReg) { + Ops[0] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), dl, + FuncInfo.ExceptionPointerVirtReg, + TLI.getPointerTy(DAG.getDataLayout())), + dl, ValueVTs[0]); + } else { + Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())); + } + Ops[1] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), dl, + FuncInfo.ExceptionSelectorVirtReg, + TLI.getPointerTy(DAG.getDataLayout())), + dl, ValueVTs[1]); + + // Merge into one. + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(ValueVTs), Ops); + setValue(&LP, Res); +} + +void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, + MachineBasicBlock *Last) { + // Update JTCases. + for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i) + if (SL->JTCases[i].first.HeaderBB == First) + SL->JTCases[i].first.HeaderBB = Last; + + // Update BitTestCases. + for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i) + if (SL->BitTestCases[i].Parent == First) + SL->BitTestCases[i].Parent = Last; +} + +void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; + + // Update machine-CFG edges with unique successors. + SmallSet<BasicBlock*, 32> Done; + for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { + BasicBlock *BB = I.getSuccessor(i); + bool Inserted = Done.insert(BB).second; + if (!Inserted) + continue; + + MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; + addSuccessorWithProb(IndirectBrMBB, Succ); + } + IndirectBrMBB->normalizeSuccProbs(); + + DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), + MVT::Other, getControlRoot(), + getValue(I.getAddress()))); +} + +void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { + if (!DAG.getTarget().Options.TrapUnreachable) + return; + + // We may be able to ignore unreachable behind a noreturn call. + if (DAG.getTarget().Options.NoTrapAfterNoreturn) { + const BasicBlock &BB = *I.getParent(); + if (&I != &BB.front()) { + BasicBlock::const_iterator PredI = + std::prev(BasicBlock::const_iterator(&I)); + if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) { + if (Call->doesNotReturn()) + return; + } + } + } + + DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); +} + +void SelectionDAGBuilder::visitFSub(const User &I) { + // -0.0 - X --> fneg + Type *Ty = I.getType(); + if (isa<Constant>(I.getOperand(0)) && + I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), + Op2.getValueType(), Op2)); + return; + } + + visitBinary(I, ISD::FSUB); +} + +/// Checks if the given instruction performs a vector reduction, in which case +/// we have the freedom to alter the elements in the result as long as the +/// reduction of them stays unchanged. +static bool isVectorReductionOp(const User *I) { + const Instruction *Inst = dyn_cast<Instruction>(I); + if (!Inst || !Inst->getType()->isVectorTy()) + return false; + + auto OpCode = Inst->getOpcode(); + switch (OpCode) { + case Instruction::Add: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + break; + case Instruction::FAdd: + case Instruction::FMul: + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) + if (FPOp->getFastMathFlags().isFast()) + break; + LLVM_FALLTHROUGH; + default: + return false; + } + + unsigned ElemNum = Inst->getType()->getVectorNumElements(); + // Ensure the reduction size is a power of 2. + if (!isPowerOf2_32(ElemNum)) + return false; + + unsigned ElemNumToReduce = ElemNum; + + // Do DFS search on the def-use chain from the given instruction. We only + // allow four kinds of operations during the search until we reach the + // instruction that extracts the first element from the vector: + // + // 1. The reduction operation of the same opcode as the given instruction. + // + // 2. PHI node. + // + // 3. ShuffleVector instruction together with a reduction operation that + // does a partial reduction. + // + // 4. ExtractElement that extracts the first element from the vector, and we + // stop searching the def-use chain here. + // + // 3 & 4 above perform a reduction on all elements of the vector. We push defs + // from 1-3 to the stack to continue the DFS. The given instruction is not + // a reduction operation if we meet any other instructions other than those + // listed above. + + SmallVector<const User *, 16> UsersToVisit{Inst}; + SmallPtrSet<const User *, 16> Visited; + bool ReduxExtracted = false; + + while (!UsersToVisit.empty()) { + auto User = UsersToVisit.back(); + UsersToVisit.pop_back(); + if (!Visited.insert(User).second) + continue; + + for (const auto &U : User->users()) { + auto Inst = dyn_cast<Instruction>(U); + if (!Inst) + return false; + + if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) + if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast()) + return false; + UsersToVisit.push_back(U); + } else if (const ShuffleVectorInst *ShufInst = + dyn_cast<ShuffleVectorInst>(U)) { + // Detect the following pattern: A ShuffleVector instruction together + // with a reduction that do partial reduction on the first and second + // ElemNumToReduce / 2 elements, and store the result in + // ElemNumToReduce / 2 elements in another vector. + + unsigned ResultElements = ShufInst->getType()->getVectorNumElements(); + if (ResultElements < ElemNum) + return false; + + if (ElemNumToReduce == 1) + return false; + if (!isa<UndefValue>(U->getOperand(1))) + return false; + for (unsigned i = 0; i < ElemNumToReduce / 2; ++i) + if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2)) + return false; + for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i) + if (ShufInst->getMaskValue(i) != -1) + return false; + + // There is only one user of this ShuffleVector instruction, which + // must be a reduction operation. + if (!U->hasOneUse()) + return false; + + auto U2 = dyn_cast<Instruction>(*U->user_begin()); + if (!U2 || U2->getOpcode() != OpCode) + return false; + + // Check operands of the reduction operation. + if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) || + (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) { + UsersToVisit.push_back(U2); + ElemNumToReduce /= 2; + } else + return false; + } else if (isa<ExtractElementInst>(U)) { + // At this moment we should have reduced all elements in the vector. + if (ElemNumToReduce != 1) + return false; + + const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1)); + if (!Val || !Val->isZero()) + return false; + + ReduxExtracted = true; + } else + return false; + } + } + return ReduxExtracted; +} + +void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { + SDNodeFlags Flags; + + SDValue Op = getValue(I.getOperand(0)); + SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(), + Op, Flags); + setValue(&I, UnNodeValue); +} + +void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { + SDNodeFlags Flags; + if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) { + Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); + Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); + } + if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) { + Flags.setExact(ExactOp->isExact()); + } + if (isVectorReductionOp(&I)) { + Flags.setVectorReduction(true); + LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); + } + + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), + Op1, Op2, Flags); + setValue(&I, BinNodeValue); +} + +void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( + Op1.getValueType(), DAG.getDataLayout()); + + // Coerce the shift amount to the right type if we can. + if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { + unsigned ShiftSize = ShiftTy.getSizeInBits(); + unsigned Op2Size = Op2.getValueSizeInBits(); + SDLoc DL = getCurSDLoc(); + + // If the operand is smaller than the shift count type, promote it. + if (ShiftSize > Op2Size) + Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); + + // If the operand is larger than the shift count type but the shift + // count type has enough bits to represent any shift value, truncate + // it now. This is a common case and it exposes the truncate to + // optimization early. + else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits())) + Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); + // Otherwise we'll need to temporarily settle for some other convenient + // type. Type legalization will make adjustments once the shiftee is split. + else + Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); + } + + bool nuw = false; + bool nsw = false; + bool exact = false; + + if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { + + if (const OverflowingBinaryOperator *OFBinOp = + dyn_cast<const OverflowingBinaryOperator>(&I)) { + nuw = OFBinOp->hasNoUnsignedWrap(); + nsw = OFBinOp->hasNoSignedWrap(); + } + if (const PossiblyExactOperator *ExactOp = + dyn_cast<const PossiblyExactOperator>(&I)) + exact = ExactOp->isExact(); + } + SDNodeFlags Flags; + Flags.setExact(exact); + Flags.setNoSignedWrap(nsw); + Flags.setNoUnsignedWrap(nuw); + SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, + Flags); + setValue(&I, Res); +} + +void SelectionDAGBuilder::visitSDiv(const User &I) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + SDNodeFlags Flags; + Flags.setExact(isa<PossiblyExactOperator>(&I) && + cast<PossiblyExactOperator>(&I)->isExact()); + setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, + Op2, Flags)); +} + +void SelectionDAGBuilder::visitICmp(const User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) + predicate = IC->getPredicate(); + else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode = getICmpCondCode(predicate); + + auto &TLI = DAG.getTargetLoweringInfo(); + EVT MemVT = + TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); + + // If a pointer's DAG type is larger than its memory type then the DAG values + // are zero-extended. This breaks signed comparisons so truncate back to the + // underlying type before doing the compare. + if (Op1.getValueType() != MemVT) { + Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT); + Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT); + } + + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); +} + +void SelectionDAGBuilder::visitFCmp(const User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) + predicate = FC->getPredicate(); + else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + ISD::CondCode Condition = getFCmpCondCode(predicate); + auto *FPMO = dyn_cast<FPMathOperator>(&I); + if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); + + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); +} + +// Check if the condition of the select has one use or two users that are both +// selects with the same condition. +static bool hasOnlySelectUsers(const Value *Cond) { + return llvm::all_of(Cond->users(), [](const Value *V) { + return isa<SelectInst>(V); + }); +} + +void SelectionDAGBuilder::visitSelect(const User &I) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), + ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + + SmallVector<SDValue, 4> Values(NumValues); + SDValue Cond = getValue(I.getOperand(0)); + SDValue LHSVal = getValue(I.getOperand(1)); + SDValue RHSVal = getValue(I.getOperand(2)); + auto BaseOps = {Cond}; + ISD::NodeType OpCode = Cond.getValueType().isVector() ? + ISD::VSELECT : ISD::SELECT; + + bool IsUnaryAbs = false; + + // Min/max matching is only viable if all output VTs are the same. + if (is_splat(ValueVTs)) { + EVT VT = ValueVTs[0]; + LLVMContext &Ctx = *DAG.getContext(); + auto &TLI = DAG.getTargetLoweringInfo(); + + // We care about the legality of the operation after it has been type + // legalized. + while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal) + VT = TLI.getTypeToTransformTo(Ctx, VT); + + // If the vselect is legal, assume we want to leave this as a vector setcc + + // vselect. Otherwise, if this is going to be scalarized, we want to see if + // min/max is legal on the scalar type. + bool UseScalarMinMax = VT.isVector() && + !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT); + + Value *LHS, *RHS; + auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS); + ISD::NodeType Opc = ISD::DELETED_NODE; + switch (SPR.Flavor) { + case SPF_UMAX: Opc = ISD::UMAX; break; + case SPF_UMIN: Opc = ISD::UMIN; break; + case SPF_SMAX: Opc = ISD::SMAX; break; + case SPF_SMIN: Opc = ISD::SMIN; break; + case SPF_FMINNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; + case SPNB_RETURNS_ANY: { + if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT)) + Opc = ISD::FMINNUM; + else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT)) + Opc = ISD::FMINIMUM; + else if (UseScalarMinMax) + Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ? + ISD::FMINNUM : ISD::FMINIMUM; + break; + } + } + break; + case SPF_FMAXNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; + case SPNB_RETURNS_ANY: + + if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT)) + Opc = ISD::FMAXNUM; + else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT)) + Opc = ISD::FMAXIMUM; + else if (UseScalarMinMax) + Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ? + ISD::FMAXNUM : ISD::FMAXIMUM; + break; + } + break; + case SPF_ABS: + IsUnaryAbs = true; + Opc = ISD::ABS; + break; + case SPF_NABS: + // TODO: we need to produce sub(0, abs(X)). + default: break; + } + + if (!IsUnaryAbs && Opc != ISD::DELETED_NODE && + (TLI.isOperationLegalOrCustom(Opc, VT) || + (UseScalarMinMax && + TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && + // If the underlying comparison instruction is used by any other + // instruction, the consumed instructions won't be destroyed, so it is + // not profitable to convert to a min/max. + hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) { + OpCode = Opc; + LHSVal = getValue(LHS); + RHSVal = getValue(RHS); + BaseOps = {}; + } + + if (IsUnaryAbs) { + OpCode = Opc; + LHSVal = getValue(LHS); + BaseOps = {}; + } + } + + if (IsUnaryAbs) { + for (unsigned i = 0; i != NumValues; ++i) { + Values[i] = + DAG.getNode(OpCode, getCurSDLoc(), + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), + SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + } + } else { + for (unsigned i = 0; i != NumValues; ++i) { + SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end()); + Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); + Values[i] = DAG.getNode( + OpCode, getCurSDLoc(), + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops); + } + } + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(ValueVTs), Values)); +} + +void SelectionDAGBuilder::visitTrunc(const User &I) { + // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitZExt(const User &I) { + // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // ZExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitSExt(const User &I) { + // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // SExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPTrunc(const User &I) { + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, + DAG.getTargetConstant( + 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); +} + +void SelectionDAGBuilder::visitFPExt(const User &I) { + // FPExt is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPToUI(const User &I) { + // FPToUI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPToSI(const User &I) { + // FPToSI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitUIToFP(const User &I) { + // UIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitSIToFP(const User &I) { + // SIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitPtrToInt(const User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + auto &TLI = DAG.getTargetLoweringInfo(); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + EVT PtrMemVT = + TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); + N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT); + setValue(&I, N); +} + +void SelectionDAGBuilder::visitIntToPtr(const User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + auto &TLI = DAG.getTargetLoweringInfo(); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT); + N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT); + setValue(&I, N); +} + +void SelectionDAGBuilder::visitBitCast(const User &I) { + SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + + // BitCast assures us that source and destination are the same size so this is + // either a BITCAST or a no-op. + if (DestVT != N.getValueType()) + setValue(&I, DAG.getNode(ISD::BITCAST, dl, + DestVT, N)); // convert types. + // Check if the original LLVM IR Operand was a ConstantInt, because getValue() + // might fold any kind of constant expression to an integer constant and that + // is not what we are looking for. Only recognize a bitcast of a genuine + // constant integer as an opaque constant. + else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) + setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, + /*isOpaque*/true)); + else + setValue(&I, N); // noop cast. +} + +void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const Value *SV = I.getOperand(0); + SDValue N = getValue(SV); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + unsigned SrcAS = SV->getType()->getPointerAddressSpace(); + unsigned DestAS = I.getType()->getPointerAddressSpace(); + + if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); + + setValue(&I, N); +} + +void SelectionDAGBuilder::visitInsertElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue InVec = getValue(I.getOperand(0)); + SDValue InVal = getValue(I.getOperand(1)); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), + TLI.getVectorIdxTy(DAG.getDataLayout())); + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), + TLI.getValueType(DAG.getDataLayout(), I.getType()), + InVec, InVal, InIdx)); +} + +void SelectionDAGBuilder::visitExtractElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue InVec = getValue(I.getOperand(0)); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), + TLI.getVectorIdxTy(DAG.getDataLayout())); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), + TLI.getValueType(DAG.getDataLayout(), I.getType()), + InVec, InIdx)); +} + +void SelectionDAGBuilder::visitShuffleVector(const User &I) { + SDValue Src1 = getValue(I.getOperand(0)); + SDValue Src2 = getValue(I.getOperand(1)); + Constant *MaskV = cast<Constant>(I.getOperand(2)); + SDLoc DL = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + EVT SrcVT = Src1.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); + + if (MaskV->isNullValue() && VT.isScalableVector()) { + // Canonical splat form of first element of first input vector. + SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + SrcVT.getScalarType(), Src1, + DAG.getConstant(0, DL, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt)); + return; + } + + // For now, we only handle splats for scalable vectors. + // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation + // for targets that support a SPLAT_VECTOR for non-scalable vector types. + assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle"); + + SmallVector<int, 8> Mask; + ShuffleVectorInst::getShuffleMask(MaskV, Mask); + unsigned MaskNumElts = Mask.size(); + + if (SrcNumElts == MaskNumElts) { + setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask)); + return; + } + + // Normalize the shuffle vector since mask and vector length don't match. + if (SrcNumElts < MaskNumElts) { + // Mask is longer than the source vectors. We can use concatenate vector to + // make the mask and vectors lengths match. + + if (MaskNumElts % SrcNumElts == 0) { + // Mask length is a multiple of the source vector length. + // Check if the shuffle is some kind of concatenation of the input + // vectors. + unsigned NumConcat = MaskNumElts / SrcNumElts; + bool IsConcat = true; + SmallVector<int, 8> ConcatSrcs(NumConcat, -1); + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + continue; + // Ensure the indices in each SrcVT sized piece are sequential and that + // the same source is used for the whole piece. + if ((Idx % SrcNumElts != (i % SrcNumElts)) || + (ConcatSrcs[i / SrcNumElts] >= 0 && + ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { + IsConcat = false; + break; + } + // Remember which source this index came from. + ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; + } + + // The shuffle is concatenating multiple vectors together. Just emit + // a CONCAT_VECTORS operation. + if (IsConcat) { + SmallVector<SDValue, 8> ConcatOps; + for (auto Src : ConcatSrcs) { + if (Src < 0) + ConcatOps.push_back(DAG.getUNDEF(SrcVT)); + else if (Src == 0) + ConcatOps.push_back(Src1); + else + ConcatOps.push_back(Src2); + } + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps)); + return; + } + } + + unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts); + unsigned NumConcat = PaddedMaskNumElts / SrcNumElts; + EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + PaddedMaskNumElts); + + // Pad both vectors with undefs to make them the same length as the mask. + SDValue UndefVal = DAG.getUNDEF(SrcVT); + + SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); + SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); + MOps1[0] = Src1; + MOps2[0] = Src2; + + Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1); + Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2); + + // Readjust mask for new input vector length. + SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1); + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts - PaddedMaskNumElts; + MappedOps[i] = Idx; + } + + SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps); + + // If the concatenated vector was padded, extract a subvector with the + // correct number of elements. + if (MaskNumElts != PaddedMaskNumElts) + Result = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, VT, Result, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + setValue(&I, Result); + return; + } + + if (SrcNumElts > MaskNumElts) { + // Analyze the access pattern of the vector to see if we can extract + // two subvectors and do the shuffle. + int StartIdx[2] = { -1, -1 }; // StartIdx to extract from + bool CanExtract = true; + for (int Idx : Mask) { + unsigned Input = 0; + if (Idx < 0) + continue; + + if (Idx >= (int)SrcNumElts) { + Input = 1; + Idx -= SrcNumElts; + } + + // If all the indices come from the same MaskNumElts sized portion of + // the sources we can use extract. Also make sure the extract wouldn't + // extract past the end of the source. + int NewStartIdx = alignDown(Idx, MaskNumElts); + if (NewStartIdx + MaskNumElts > SrcNumElts || + (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx)) + CanExtract = false; + // Make sure we always update StartIdx as we use it to track if all + // elements are undef. + StartIdx[Input] = NewStartIdx; + } + + if (StartIdx[0] < 0 && StartIdx[1] < 0) { + setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. + return; + } + if (CanExtract) { + // Extract appropriate subvector and generate a vector shuffle + for (unsigned Input = 0; Input < 2; ++Input) { + SDValue &Src = Input == 0 ? Src1 : Src2; + if (StartIdx[Input] < 0) + Src = DAG.getUNDEF(VT); + else { + Src = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, VT, Src, + DAG.getConstant(StartIdx[Input], DL, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + } + + // Calculate new mask. + SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end()); + for (int &Idx : MappedOps) { + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + else if (Idx >= 0) + Idx -= StartIdx[0]; + } + + setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps)); + return; + } + } + + // We can't use either concat vectors or extract subvectors so fall back to + // replacing the shuffle with extract and build vector. + // to insert and build vector. + EVT EltVT = VT.getVectorElementType(); + EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + SmallVector<SDValue,8> Ops; + for (int Idx : Mask) { + SDValue Res; + + if (Idx < 0) { + Res = DAG.getUNDEF(EltVT); + } else { + SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; + if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; + + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + EltVT, Src, DAG.getConstant(Idx, DL, IdxVT)); + } + + Ops.push_back(Res); + } + + setValue(&I, DAG.getBuildVector(VT, DL, Ops)); +} + +void SelectionDAGBuilder::visitInsertValue(const User &I) { + ArrayRef<unsigned> Indices; + if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I)) + Indices = IV->getIndices(); + else + Indices = cast<ConstantExpr>(&I)->getIndices(); + + const Value *Op0 = I.getOperand(0); + const Value *Op1 = I.getOperand(1); + Type *AggTy = I.getType(); + Type *ValTy = Op1->getType(); + bool IntoUndef = isa<UndefValue>(Op0); + bool FromUndef = isa<UndefValue>(Op1); + + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 4> AggValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs); + SmallVector<EVT, 4> ValValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); + + unsigned NumAggValues = AggValueVTs.size(); + unsigned NumValValues = ValValueVTs.size(); + SmallVector<SDValue, 4> Values(NumAggValues); + + // Ignore an insertvalue that produces an empty object + if (!NumAggValues) { + setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); + return; + } + + SDValue Agg = getValue(Op0); + unsigned i = 0; + // Copy the beginning value(s) from the original aggregate. + for (; i != LinearIndex; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + // Copy values from the inserted value(s). + if (NumValValues) { + SDValue Val = getValue(Op1); + for (; i != LinearIndex + NumValValues; ++i) + Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + } + // Copy remaining value(s) from the original aggregate. + for (; i != NumAggValues; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(AggValueVTs), Values)); +} + +void SelectionDAGBuilder::visitExtractValue(const User &I) { + ArrayRef<unsigned> Indices; + if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I)) + Indices = EV->getIndices(); + else + Indices = cast<ConstantExpr>(&I)->getIndices(); + + const Value *Op0 = I.getOperand(0); + Type *AggTy = Op0->getType(); + Type *ValTy = I.getType(); + bool OutOfUndef = isa<UndefValue>(Op0); + + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 4> ValValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); + + unsigned NumValValues = ValValueVTs.size(); + + // Ignore a extractvalue that produces an empty object + if (!NumValValues) { + setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); + return; + } + + SmallVector<SDValue, 4> Values(NumValValues); + + SDValue Agg = getValue(Op0); + // Copy out the selected value(s). + for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) + Values[i - LinearIndex] = + OutOfUndef ? + DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(ValValueVTs), Values)); +} + +void SelectionDAGBuilder::visitGetElementPtr(const User &I) { + Value *Op0 = I.getOperand(0); + // Note that the pointer operand may be a vector of pointers. Take the scalar + // element which holds a pointer. + unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace(); + SDValue N = getValue(Op0); + SDLoc dl = getCurSDLoc(); + auto &TLI = DAG.getTargetLoweringInfo(); + MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS); + MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS); + + // Normalize Vector GEP - all scalar operands should be converted to the + // splat vector. + unsigned VectorWidth = I.getType()->isVectorTy() ? + I.getType()->getVectorNumElements() : 0; + + if (VectorWidth && !N.getValueType().isVector()) { + LLVMContext &Context = *DAG.getContext(); + EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); + N = DAG.getSplatBuildVector(VT, dl, N); + } + + for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); + GTI != E; ++GTI) { + const Value *Idx = GTI.getOperand(); + if (StructType *StTy = GTI.getStructTypeOrNull()) { + unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); + + // In an inbounds GEP with an offset that is nonnegative even when + // interpreted as signed, assume there is no unsigned overflow. + SDNodeFlags Flags; + if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds()) + Flags.setNoUnsignedWrap(true); + + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, + DAG.getConstant(Offset, dl, N.getValueType()), Flags); + } + } else { + unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); + MVT IdxTy = MVT::getIntegerVT(IdxSize); + APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType())); + + // If this is a scalar constant or a splat vector of constants, + // handle it quickly. + const auto *C = dyn_cast<Constant>(Idx); + if (C && isa<VectorType>(C->getType())) + C = C->getSplatValue(); + + if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) { + if (CI->isZero()) + continue; + APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); + LLVMContext &Context = *DAG.getContext(); + SDValue OffsVal = VectorWidth ? + DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : + DAG.getConstant(Offs, dl, IdxTy); + + // In an inbounds GEP with an offset that is nonnegative even when + // interpreted as signed, assume there is no unsigned overflow. + SDNodeFlags Flags; + if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()) + Flags.setNoUnsignedWrap(true); + + OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType()); + + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags); + continue; + } + + // N = N + Idx * ElementSize; + SDValue IdxN = getValue(Idx); + + if (!IdxN.getValueType().isVector() && VectorWidth) { + EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth); + IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); + } + + // If the index is smaller or larger than intptr_t, truncate or extend + // it. + IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); + + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (ElementSize != 1) { + if (ElementSize.isPowerOf2()) { + unsigned Amt = ElementSize.logBase2(); + IdxN = DAG.getNode(ISD::SHL, dl, + N.getValueType(), IdxN, + DAG.getConstant(Amt, dl, IdxN.getValueType())); + } else { + SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl, + IdxN.getValueType()); + IdxN = DAG.getNode(ISD::MUL, dl, + N.getValueType(), IdxN, Scale); + } + } + + N = DAG.getNode(ISD::ADD, dl, + N.getValueType(), N, IdxN); + } + } + + if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds()) + N = DAG.getPtrExtendInReg(N, dl, PtrMemTy); + + setValue(&I, N); +} + +void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { + // If this is a fixed sized alloca in the entry block of the function, + // allocate it statically on the stack. + if (FuncInfo.StaticAllocaMap.count(&I)) + return; // getValue will auto-populate this. + + SDLoc dl = getCurSDLoc(); + Type *Ty = I.getAllocatedType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment()); + + SDValue AllocSize = getValue(I.getArraySize()); + + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace()); + if (AllocSize.getValueType() != IntPtr) + AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); + + AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, + AllocSize, + DAG.getConstant(TySize, dl, IntPtr)); + + // Handle alignment. If the requested alignment is less than or equal to + // the stack alignment, ignore it. If the size is greater than or equal to + // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. + unsigned StackAlign = + DAG.getSubtarget().getFrameLowering()->getStackAlignment(); + if (Align <= StackAlign) + Align = 0; + + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags); + + // Mask out the low bits for alignment purposes. + AllocSize = + DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr)); + + SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)}; + SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); + setValue(&I, DSA); + DAG.setRoot(DSA.getValue(1)); + + assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects()); +} + +void SelectionDAGBuilder::visitLoad(const LoadInst &I) { + if (I.isAtomic()) + return visitAtomicLoad(I); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const Value *SV = I.getOperand(0); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(SV)) { + if (Arg->hasSwiftErrorAttr()) + return visitLoadFromSwiftError(I); + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { + if (Alloca->isSwiftError()) + return visitLoadFromSwiftError(I); + } + } + + SDValue Ptr = getValue(SV); + + Type *Ty = I.getType(); + + bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal); + bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load); + bool isDereferenceable = + isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); + unsigned Alignment = I.getAlignment(); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SmallVector<EVT, 4> ValueVTs, MemVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + SDValue Root; + bool ConstantMemory = false; + if (isVolatile || NumValues > MaxParallelChains) + // Serialize volatile loads with other side effects. + Root = getRoot(); + else if (AA && + AA->pointsToConstantMemory(MemoryLocation( + SV, + LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = DAG.getRoot(); + } + + SDLoc dl = getCurSDLoc(); + + if (isVolatile) + Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); + + // An aggregate load cannot wrap around the address space, so offsets to its + // parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + + SmallVector<SDValue, 4> Values(NumValues); + SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); + EVT PtrVT = Ptr.getValueType(); + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // Serializing loads here may result in excessive register pressure, and + // TokenFactor places arbitrary choke points on the scheduler. SD scheduling + // could recover a bit by hoisting nodes upward in the chain by recognizing + // they are side-effect free or do not alias. The optimizer should really + // avoid this case by converting large object/array copies to llvm.memcpy + // (MaxParallelChains should always remain as failsafe). + if (ChainI == MaxParallelChains) { + assert(PendingLoads.empty() && "PendingLoads must be serialized first"); + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(Chains.data(), ChainI)); + Root = Chain; + ChainI = 0; + } + SDValue A = DAG.getNode(ISD::ADD, dl, + PtrVT, Ptr, + DAG.getConstant(Offsets[i], dl, PtrVT), + Flags); + auto MMOFlags = MachineMemOperand::MONone; + if (isVolatile) + MMOFlags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + MMOFlags |= MachineMemOperand::MONonTemporal; + if (isInvariant) + MMOFlags |= MachineMemOperand::MOInvariant; + if (isDereferenceable) + MMOFlags |= MachineMemOperand::MODereferenceable; + MMOFlags |= TLI.getMMOFlags(I); + + SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, + MachinePointerInfo(SV, Offsets[i]), Alignment, + MMOFlags, AAInfo, Ranges); + Chains[ChainI] = L.getValue(1); + + if (MemVTs[i] != ValueVTs[i]) + L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]); + + Values[i] = L; + } + + if (!ConstantMemory) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(Chains.data(), ChainI)); + if (isVolatile) + DAG.setRoot(Chain); + else + PendingLoads.push_back(Chain); + } + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(ValueVTs), Values)); +} + +void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { + assert(DAG.getTargetLoweringInfo().supportSwiftError() && + "call visitStoreToSwiftError when backend supports swifterror"); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + const Value *SrcV = I.getOperand(0); + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), + SrcV->getType(), ValueVTs, &Offsets); + assert(ValueVTs.size() == 1 && Offsets[0] == 0 && + "expect a single EVT for swifterror"); + + SDValue Src = getValue(SrcV); + // Create a virtual register, then update the virtual register. + Register VReg = + SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand()); + // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue + // Chain can be getRoot or getControlRoot. + SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, + SDValue(Src.getNode(), Src.getResNo())); + DAG.setRoot(CopyNode); +} + +void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { + assert(DAG.getTargetLoweringInfo().supportSwiftError() && + "call visitLoadFromSwiftError when backend supports swifterror"); + + assert(!I.isVolatile() && + !I.hasMetadata(LLVMContext::MD_nontemporal) && + !I.hasMetadata(LLVMContext::MD_invariant_load) && + "Support volatile, non temporal, invariant for load_from_swift_error"); + + const Value *SV = I.getOperand(0); + Type *Ty = I.getType(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + assert( + (!AA || + !AA->pointsToConstantMemory(MemoryLocation( + SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), + AAInfo))) && + "load_from_swift_error should not be constant memory"); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, + ValueVTs, &Offsets); + assert(ValueVTs.size() == 1 && Offsets[0] == 0 && + "expect a single EVT for swifterror"); + + // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT + SDValue L = DAG.getCopyFromReg( + getRoot(), getCurSDLoc(), + SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]); + + setValue(&I, L); +} + +void SelectionDAGBuilder::visitStore(const StoreInst &I) { + if (I.isAtomic()) + return visitAtomicStore(I); + + const Value *SrcV = I.getOperand(0); + const Value *PtrV = I.getOperand(1); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { + if (Arg->hasSwiftErrorAttr()) + return visitStoreToSwiftError(I); + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { + if (Alloca->isSwiftError()) + return visitStoreToSwiftError(I); + } + } + + SmallVector<EVT, 4> ValueVTs, MemVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), + SrcV->getType(), ValueVTs, &MemVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + // Get the lowered operands. Note that we do this after + // checking if NumResults is zero, because with zero results + // the operands won't have values in the map. + SDValue Src = getValue(SrcV); + SDValue Ptr = getValue(PtrV); + + SDValue Root = getRoot(); + SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); + SDLoc dl = getCurSDLoc(); + EVT PtrVT = Ptr.getValueType(); + unsigned Alignment = I.getAlignment(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + auto MMOFlags = MachineMemOperand::MONone; + if (I.isVolatile()) + MMOFlags |= MachineMemOperand::MOVolatile; + if (I.hasMetadata(LLVMContext::MD_nontemporal)) + MMOFlags |= MachineMemOperand::MONonTemporal; + MMOFlags |= TLI.getMMOFlags(I); + + // An aggregate load cannot wrap around the address space, so offsets to its + // parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // See visitLoad comments. + if (ChainI == MaxParallelChains) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(Chains.data(), ChainI)); + Root = Chain; + ChainI = 0; + } + SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, + DAG.getConstant(Offsets[i], dl, PtrVT), Flags); + SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); + if (MemVTs[i] != ValueVTs[i]) + Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); + SDValue St = + DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]), + Alignment, MMOFlags, AAInfo); + Chains[ChainI] = St; + } + + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(Chains.data(), ChainI)); + DAG.setRoot(StoreNode); +} + +void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, + bool IsCompressing) { + SDLoc sdl = getCurSDLoc(); + + auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // llvm.masked.store.*(Src0, Ptr, alignment, Mask) + Src0 = I.getArgOperand(0); + Ptr = I.getArgOperand(1); + Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue(); + Mask = I.getArgOperand(3); + }; + auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // llvm.masked.compressstore.*(Src0, Ptr, Mask) + Src0 = I.getArgOperand(0); + Ptr = I.getArgOperand(1); + Mask = I.getArgOperand(2); + Alignment = 0; + }; + + Value *PtrOperand, *MaskOperand, *Src0Operand; + unsigned Alignment; + if (IsCompressing) + getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + else + getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(Src0Operand); + SDValue Mask = getValue(MaskOperand); + + EVT VT = Src0.getValueType(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, + MMO, false /* Truncating */, + IsCompressing); + DAG.setRoot(StoreNode); + setValue(&I, StoreNode); +} + +// Get a uniform base for the Gather/Scatter intrinsic. +// The first argument of the Gather/Scatter intrinsic is a vector of pointers. +// We try to represent it as a base pointer + vector of indices. +// Usually, the vector of pointers comes from a 'getelementptr' instruction. +// The first operand of the GEP may be a single pointer or a vector of pointers +// Example: +// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind +// or +// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind +// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. +// +// When the first GEP operand is a single pointer - it is the uniform base we +// are looking for. If first operand of the GEP is a splat vector - we +// extract the splat value and use it as a uniform base. +// In all other cases the function returns 'false'. +static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, + ISD::MemIndexType &IndexType, SDValue &Scale, + SelectionDAGBuilder *SDB) { + SelectionDAG& DAG = SDB->DAG; + LLVMContext &Context = *DAG.getContext(); + + assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); + if (!GEP) + return false; + + const Value *GEPPtr = GEP->getPointerOperand(); + if (!GEPPtr->getType()->isVectorTy()) + Ptr = GEPPtr; + else if (!(Ptr = getSplatValue(GEPPtr))) + return false; + + unsigned FinalIndex = GEP->getNumOperands() - 1; + Value *IndexVal = GEP->getOperand(FinalIndex); + + // Ensure all the other indices are 0. + for (unsigned i = 1; i < FinalIndex; ++i) { + auto *C = dyn_cast<Constant>(GEP->getOperand(i)); + if (!C) + return false; + if (isa<VectorType>(C->getType())) + C = C->getSplatValue(); + auto *CI = dyn_cast_or_null<ConstantInt>(C); + if (!CI || !CI->isZero()) + return false; + } + + // The operands of the GEP may be defined in another basic block. + // In this case we'll not find nodes for the operands. + if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) + return false; + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()), + SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + Base = SDB->getValue(Ptr); + Index = SDB->getValue(IndexVal); + IndexType = ISD::SIGNED_SCALED; + + if (!Index.getValueType().isVector()) { + unsigned GEPWidth = GEP->getType()->getVectorNumElements(); + EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); + Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index); + } + return true; +} + +void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) + const Value *Ptr = I.getArgOperand(1); + SDValue Src0 = getValue(I.getArgOperand(0)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + SDValue Base; + SDValue Index; + ISD::MemIndexType IndexType; + SDValue Scale; + const Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, + this); + + const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + if (!UniformBase) { + Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(Ptr); + IndexType = ISD::SIGNED_SCALED; + Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); + } + SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale }; + SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, + Ops, MMO, IndexType); + DAG.setRoot(Scatter); + setValue(&I, Scatter); +} + +void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { + SDLoc sdl = getCurSDLoc(); + + auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) + Ptr = I.getArgOperand(0); + Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + Mask = I.getArgOperand(2); + Src0 = I.getArgOperand(3); + }; + auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // @llvm.masked.expandload.*(Ptr, Mask, Src0) + Ptr = I.getArgOperand(0); + Alignment = 0; + Mask = I.getArgOperand(1); + Src0 = I.getArgOperand(2); + }; + + Value *PtrOperand, *MaskOperand, *Src0Operand; + unsigned Alignment; + if (IsExpanding) + getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + else + getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(Src0Operand); + SDValue Mask = getValue(MaskOperand); + + EVT VT = Src0.getValueType(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + // Do not serialize masked loads of constant memory with anything. + bool AddToChain = + !AA || !AA->pointsToConstantMemory(MemoryLocation( + PtrOperand, + LocationSize::precise( + DAG.getDataLayout().getTypeStoreSize(I.getType())), + AAInfo)); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, + ISD::NON_EXTLOAD, IsExpanding); + if (AddToChain) + PendingLoads.push_back(Load.getValue(1)); + setValue(&I, Load); +} + +void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) + const Value *Ptr = I.getArgOperand(0); + SDValue Src0 = getValue(I.getArgOperand(3)); + SDValue Mask = getValue(I.getArgOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue Root = DAG.getRoot(); + SDValue Base; + SDValue Index; + ISD::MemIndexType IndexType; + SDValue Scale; + const Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, + this); + bool ConstantMemory = false; + if (UniformBase && AA && + AA->pointsToConstantMemory( + MemoryLocation(BasePtr, + LocationSize::precise( + DAG.getDataLayout().getTypeStoreSize(I.getType())), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + if (!UniformBase) { + Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(Ptr); + IndexType = ISD::SIGNED_SCALED; + Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); + } + SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; + SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, + Ops, MMO, IndexType); + + SDValue OutChain = Gather.getValue(1); + if (!ConstantMemory) + PendingLoads.push_back(OutChain); + setValue(&I, Gather); +} + +void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { + SDLoc dl = getCurSDLoc(); + AtomicOrdering SuccessOrdering = I.getSuccessOrdering(); + AtomicOrdering FailureOrdering = I.getFailureOrdering(); + SyncScope::ID SSID = I.getSyncScopeID(); + + SDValue InChain = getRoot(); + + MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); + SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); + + auto Alignment = DAG.getEVTAlignment(MemVT); + + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + Flags, MemVT.getStoreSize(), Alignment, + AAMDNodes(), nullptr, SSID, SuccessOrdering, + FailureOrdering); + + SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, + dl, MemVT, VTs, InChain, + getValue(I.getPointerOperand()), + getValue(I.getCompareOperand()), + getValue(I.getNewValOperand()), MMO); + + SDValue OutChain = L.getValue(2); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { + SDLoc dl = getCurSDLoc(); + ISD::NodeType NT; + switch (I.getOperation()) { + default: llvm_unreachable("Unknown atomicrmw operation"); + case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; + case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; + case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; + case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; + case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; + case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; + case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; + case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; + case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; + case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; + case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; + case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break; + case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; + } + AtomicOrdering Ordering = I.getOrdering(); + SyncScope::ID SSID = I.getSyncScopeID(); + + SDValue InChain = getRoot(); + + auto MemVT = getValue(I.getValOperand()).getSimpleValueType(); + auto Alignment = DAG.getEVTAlignment(MemVT); + + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, + MemVT.getStoreSize(), Alignment, AAMDNodes(), + nullptr, SSID, Ordering); + + SDValue L = + DAG.getAtomic(NT, dl, MemVT, InChain, + getValue(I.getPointerOperand()), getValue(I.getValOperand()), + MMO); + + SDValue OutChain = L.getValue(1); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitFence(const FenceInst &I) { + SDLoc dl = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Ops[3]; + Ops[0] = getRoot(); + Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, + TLI.getFenceOperandTy(DAG.getDataLayout())); + Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl, + TLI.getFenceOperandTy(DAG.getDataLayout())); + DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); +} + +void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { + SDLoc dl = getCurSDLoc(); + AtomicOrdering Order = I.getOrdering(); + SyncScope::ID SSID = I.getSyncScopeID(); + + SDValue InChain = getRoot(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); + + if (!TLI.supportsUnalignedAtomics() && + I.getAlignment() < MemVT.getSizeInBits() / 8) + report_fatal_error("Cannot generate unaligned atomic load"); + + auto Flags = MachineMemOperand::MOLoad; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + if (I.hasMetadata(LLVMContext::MD_invariant_load)) + Flags |= MachineMemOperand::MOInvariant; + if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), + DAG.getDataLayout())) + Flags |= MachineMemOperand::MODereferenceable; + + Flags |= TLI.getMMOFlags(I); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + Flags, MemVT.getStoreSize(), + I.getAlignment() ? I.getAlignment() : + DAG.getEVTAlignment(MemVT), + AAMDNodes(), nullptr, SSID, Order); + + InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); + + SDValue Ptr = getValue(I.getPointerOperand()); + + if (TLI.lowerAtomicLoadAsLoadSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for loads to prevent future divergence. + SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO); + if (MemVT != VT) + L = DAG.getPtrExtOrTrunc(L, dl, VT); + + setValue(&I, L); + SDValue OutChain = L.getValue(1); + if (!I.isUnordered()) + DAG.setRoot(OutChain); + else + PendingLoads.push_back(OutChain); + return; + } + + SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, + Ptr, MMO); + + SDValue OutChain = L.getValue(1); + if (MemVT != VT) + L = DAG.getPtrExtOrTrunc(L, dl, VT); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { + SDLoc dl = getCurSDLoc(); + + AtomicOrdering Ordering = I.getOrdering(); + SyncScope::ID SSID = I.getSyncScopeID(); + + SDValue InChain = getRoot(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT MemVT = + TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); + + if (I.getAlignment() < MemVT.getSizeInBits() / 8) + report_fatal_error("Cannot generate unaligned atomic store"); + + auto Flags = MachineMemOperand::MOStore; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + Flags |= TLI.getMMOFlags(I); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, + MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(), + nullptr, SSID, Ordering); + + SDValue Val = getValue(I.getValueOperand()); + if (Val.getValueType() != MemVT) + Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); + SDValue Ptr = getValue(I.getPointerOperand()); + + if (TLI.lowerAtomicStoreAsStoreSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for stores to prevent future divergence. + SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO); + DAG.setRoot(S); + return; + } + SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, + Ptr, Val, MMO); + + + DAG.setRoot(OutChain); +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, + unsigned Intrinsic) { + // Ignore the callsite's attributes. A specific call site may be marked with + // readnone, but the lowering code will expect the chain based on the + // definition. + const Function *F = I.getCalledFunction(); + bool HasChain = !F->doesNotAccessMemory(); + bool OnlyLoad = HasChain && F->onlyReadsMemory(); + + // Build the operand list. + SmallVector<SDValue, 8> Ops; + if (HasChain) { // If this intrinsic has side-effects, chainify it. + if (OnlyLoad) { + // We don't need to serialize loads against other loads. + Ops.push_back(DAG.getRoot()); + } else { + Ops.push_back(getRoot()); + } + } + + // Info is set by getTgtMemInstrinsic + TargetLowering::IntrinsicInfo Info; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, + DAG.getMachineFunction(), + Intrinsic); + + // Add the intrinsic ID as an integer operand if it's not a target intrinsic. + if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || + Info.opc == ISD::INTRINSIC_W_CHAIN) + Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), + TLI.getPointerTy(DAG.getDataLayout()))); + + // Add all operands of the call to the operand list. + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + const Value *Arg = I.getArgOperand(i); + if (!I.paramHasAttr(i, Attribute::ImmArg)) { + Ops.push_back(getValue(Arg)); + continue; + } + + // Use TargetConstant instead of a regular constant for immarg. + EVT VT = TLI.getValueType(*DL, Arg->getType(), true); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) { + assert(CI->getBitWidth() <= 64 && + "large intrinsic immediates not handled"); + Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT)); + } else { + Ops.push_back( + DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT)); + } + } + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); + + if (HasChain) + ValueVTs.push_back(MVT::Other); + + SDVTList VTs = DAG.getVTList(ValueVTs); + + // Create the node. + SDValue Result; + if (IsTgtIntrinsic) { + // This is target intrinsic that touches memory + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + Result = DAG.getMemIntrinsicNode( + Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo); + } else if (!HasChain) { + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); + } else if (!I.getType()->isVoidTy()) { + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); + } else { + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); + } + + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + + if (!I.getType()->isVoidTy()) { + if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { + EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); + Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); + } else + Result = lowerRangeToAssertZExt(DAG, I, Result); + + setValue(&I, Result); + } +} + +/// GetSignificand - Get the significand and build it into a floating-point +/// number with exponent of 1: +/// +/// Op = (Op & 0x007fffff) | 0x3f800000; +/// +/// where Op is the hexadecimal representation of floating point value. +static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) { + SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x007fffff, dl, MVT::i32)); + SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, + DAG.getConstant(0x3f800000, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); +} + +/// GetExponent - Get the exponent: +/// +/// (float)(int)(((Op & 0x7f800000) >> 23) - 127); +/// +/// where Op is the hexadecimal representation of floating point value. +static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, + const TargetLowering &TLI, const SDLoc &dl) { + SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x7f800000, dl, MVT::i32)); + SDValue t1 = DAG.getNode( + ISD::SRL, dl, MVT::i32, t0, + DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout()))); + SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, + DAG.getConstant(127, dl, MVT::i32)); + return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); +} + +/// getF32Constant - Get 32-bit floating point constant. +static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt, + const SDLoc &dl) { + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl, + MVT::f32); +} + +static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, + SelectionDAG &DAG) { + // TODO: What fast-math-flags should be set on the floating-point nodes? + + // IntegerPartOfX = ((int32_t)(t0); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = t0 - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode( + ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy( + DAG.getDataLayout()))); + + SDValue TwoToFractionalPartOfX; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e, dl)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd, dl)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d, dl)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14, dl)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234, dl)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000, dl)); + } + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); +} + +/// expandExp - Lower an exp intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + + // Put the exponent in the right bit position for later addition to the + // final result: + // + // t0 = Op * log2(e) + + // TODO: What fast-math-flags should be set here? + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + DAG.getConstantFP(numbers::log2ef, dl, MVT::f32)); + return getLimitedPrecisionExp2(t0, dl, DAG); + } + + // No special expansion. + return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); +} + +/// expandLog - Lower a log intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + // TODO: What fast-math-flags should be set on the floating-point nodes? + + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + + // Scale the exponent by log(2). + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = + DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + DAG.getConstantFP(numbers::ln2f, dl, MVT::f32)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + SDValue LogOfMantissa; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // LogofMantissa = + // -1.1609546f + + // (1.4034025f - 0.23903021f * x) * x; + // + // error 0.0034276066, which is better than 8 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbe74c456, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3fb3a2b1, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f949a29, dl)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // LogOfMantissa = + // -1.7417939f + + // (2.8212026f + + // (-1.4699568f + + // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; + // + // error 0.000061011436, which is 14 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbd67b6d6, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ee4f4b8, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fbc278b, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40348e95, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fdef31a, dl)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // LogOfMantissa = + // -2.1072184f + + // (4.2372794f + + // (-3.7029485f + + // (2.2781945f + + // (-0.87823314f + + // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; + // + // error 0.0000023660568, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbc91e5ac, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e4350aa, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f60d3e3, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x4011cdf0, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x406cfd1c, dl)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x408797cb, dl)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4006dcab, dl)); + } + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); + } + + // No special expansion. + return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); +} + +/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + // TODO: What fast-math-flags should be set on the floating-point nodes? + + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + + // Get the exponent. + SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + // Different possible minimax approximations of significand in + // floating-point for various degrees of accuracy over [1,2]. + SDValue Log2ofMantissa; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; + // + // error 0.0049451742, which is more than 7 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbeb08fe0, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x40019463, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fd6633d, dl)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log2ofMantissa = + // -2.51285454f + + // (4.07009056f + + // (-2.12067489f + + // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; + // + // error 0.0000876136000, which is better than 13 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbda7262e, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f25280b, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x4007b923, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40823e2f, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x4020d29c, dl)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log2ofMantissa = + // -3.0400495f + + // (6.1129976f + + // (-5.3420409f + + // (3.2865683f + + // (-1.2669343f + + // (0.27515199f - + // 0.25691327e-1f * x) * x) * x) * x) * x) * x; + // + // error 0.0000018516, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbcd2769e, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e8ce0b9, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fa22ae7, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40525723, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x40aaf200, dl)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x40c39dad, dl)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4042902c, dl)); + } + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); + } + + // No special expansion. + return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); +} + +/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + // TODO: What fast-math-flags should be set on the floating-point nodes? + + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + + // Scale the exponent by log10(2) [0.30102999f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3e9a209a, dl)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + SDValue Log10ofMantissa; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log10ofMantissa = + // -0.50419619f + + // (0.60948995f - 0.10380950f * x) * x; + // + // error 0.0014886165, which is 6 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbdd49a13, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f1c0789, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f011300, dl)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log10ofMantissa = + // -0.64831180f + + // (0.91751397f + + // (-0.31664806f + 0.47637168e-1f * x) * x) * x; + // + // error 0.00019228036, which is better than 12 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3d431f31, dl)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ea21fb2, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f6ae232, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f25f7c3, dl)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log10ofMantissa = + // -0.84299375f + + // (1.5327582f + + // (-1.0688956f + + // (0.49102474f + + // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; + // + // error 0.0000037995730, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3c5d51ce, dl)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e00685a, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3efb6798, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f88d192, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fc4316c, dl)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3f57ce70, dl)); + } + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); + } + + // No special expansion. + return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); +} + +/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) + return getLimitedPrecisionExp2(Op, dl, DAG); + + // No special expansion. + return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); +} + +/// visitPow - Lower a pow intrinsic. Handles the special sequences for +/// limited-precision mode with x == 10.0f. +static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, + SelectionDAG &DAG, const TargetLowering &TLI) { + bool IsExp10 = false; + if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { + APFloat Ten(10.0f); + IsExp10 = LHSC->isExactlyValue(Ten); + } + } + + // TODO: What fast-math-flags should be set on the FMUL node? + if (IsExp10) { + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OF10 3.3219281f + // t0 = Op * LOG2OF10; + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, + getF32Constant(DAG, 0x40549a78, dl)); + return getLimitedPrecisionExp2(t0, dl, DAG); + } + + // No special expansion. + return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); +} + +/// ExpandPowI - Expand a llvm.powi intrinsic. +static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, + SelectionDAG &DAG) { + // If RHS is a constant, we can expand this out to a multiplication tree, + // otherwise we end up lowering to a call to __powidf2 (for example). When + // optimizing for size, we only want to do this if the expansion would produce + // a small number of multiplies, otherwise we do the full expansion. + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { + // Get the exponent as a positive value. + unsigned Val = RHSC->getSExtValue(); + if ((int)Val < 0) Val = -Val; + + // powi(x, 0) -> 1.0 + if (Val == 0) + return DAG.getConstantFP(1.0, DL, LHS.getValueType()); + + const Function &F = DAG.getMachineFunction().getFunction(); + if (!F.hasOptSize() || + // If optimizing for size, don't insert too many multiplies. + // This inserts up to 5 multiplies. + countPopulation(Val) + Log2_32(Val) < 7) { + // We use the simple binary decomposition method to generate the multiply + // sequence. There are more optimal ways to do this (for example, + // powi(x,15) generates one more multiply than it should), but this has + // the benefit of being both really simple and much better than a libcall. + SDValue Res; // Logically starts equal to 1.0 + SDValue CurSquare = LHS; + // TODO: Intrinsics should have fast-math-flags that propagate to these + // nodes. + while (Val) { + if (Val & 1) { + if (Res.getNode()) + Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); + else + Res = CurSquare; // 1.0*CurSquare. + } + + CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), + CurSquare, CurSquare); + Val >>= 1; + } + + // If the original was negative, invert the result, producing 1/(x*x*x). + if (RHSC->getSExtValue() < 0) + Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), + DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); + return Res; + } + } + + // Otherwise, expand to a libcall. + return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); +} + +// getUnderlyingArgRegs - Find underlying registers used for a truncated, +// bitcasted, or split argument. Returns a list of <Register, size in bits> +static void +getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs, + const SDValue &N) { + switch (N.getOpcode()) { + case ISD::CopyFromReg: { + SDValue Op = N.getOperand(1); + Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(), + Op.getValueType().getSizeInBits()); + return; + } + case ISD::BITCAST: + case ISD::AssertZext: + case ISD::AssertSext: + case ISD::TRUNCATE: + getUnderlyingArgRegs(Regs, N.getOperand(0)); + return; + case ISD::BUILD_PAIR: + case ISD::BUILD_VECTOR: + case ISD::CONCAT_VECTORS: + for (SDValue Op : N->op_values()) + getUnderlyingArgRegs(Regs, Op); + return; + default: + return; + } +} + +/// If the DbgValueInst is a dbg_value of a function argument, create the +/// corresponding DBG_VALUE machine instruction for it now. At the end of +/// instruction selection, they will be inserted to the entry BB. +bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( + const Value *V, DILocalVariable *Variable, DIExpression *Expr, + DILocation *DL, bool IsDbgDeclare, const SDValue &N) { + const Argument *Arg = dyn_cast<Argument>(V); + if (!Arg) + return false; + + if (!IsDbgDeclare) { + // ArgDbgValues are hoisted to the beginning of the entry block. So we + // should only emit as ArgDbgValue if the dbg.value intrinsic is found in + // the entry block. + bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front(); + if (!IsInEntryBlock) + return false; + + // ArgDbgValues are hoisted to the beginning of the entry block. So we + // should only emit as ArgDbgValue if the dbg.value intrinsic describes a + // variable that also is a param. + // + // Although, if we are at the top of the entry block already, we can still + // emit using ArgDbgValue. This might catch some situations when the + // dbg.value refers to an argument that isn't used in the entry block, so + // any CopyToReg node would be optimized out and the only way to express + // this DBG_VALUE is by using the physical reg (or FI) as done in this + // method. ArgDbgValues are hoisted to the beginning of the entry block. So + // we should only emit as ArgDbgValue if the Variable is an argument to the + // current function, and the dbg.value intrinsic is found in the entry + // block. + bool VariableIsFunctionInputArg = Variable->isParameter() && + !DL->getInlinedAt(); + bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder; + if (!IsInPrologue && !VariableIsFunctionInputArg) + return false; + + // Here we assume that a function argument on IR level only can be used to + // describe one input parameter on source level. If we for example have + // source code like this + // + // struct A { long x, y; }; + // void foo(struct A a, long b) { + // ... + // b = a.x; + // ... + // } + // + // and IR like this + // + // define void @foo(i32 %a1, i32 %a2, i32 %b) { + // entry: + // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment + // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment + // call void @llvm.dbg.value(metadata i32 %b, "b", + // ... + // call void @llvm.dbg.value(metadata i32 %a1, "b" + // ... + // + // then the last dbg.value is describing a parameter "b" using a value that + // is an argument. But since we already has used %a1 to describe a parameter + // we should not handle that last dbg.value here (that would result in an + // incorrect hoisting of the DBG_VALUE to the function entry). + // Notice that we allow one dbg.value per IR level argument, to accomodate + // for the situation with fragments above. + if (VariableIsFunctionInputArg) { + unsigned ArgNo = Arg->getArgNo(); + if (ArgNo >= FuncInfo.DescribedArgs.size()) + FuncInfo.DescribedArgs.resize(ArgNo + 1, false); + else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo)) + return false; + FuncInfo.DescribedArgs.set(ArgNo); + } + } + + MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); + + bool IsIndirect = false; + Optional<MachineOperand> Op; + // Some arguments' frame index is recorded during argument lowering. + int FI = FuncInfo.getArgumentFrameIndex(Arg); + if (FI != std::numeric_limits<int>::max()) + Op = MachineOperand::CreateFI(FI); + + SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes; + if (!Op && N.getNode()) { + getUnderlyingArgRegs(ArgRegsAndSizes, N); + Register Reg; + if (ArgRegsAndSizes.size() == 1) + Reg = ArgRegsAndSizes.front().first; + + if (Reg && Reg.isVirtual()) { + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + Register PR = RegInfo.getLiveInPhysReg(Reg); + if (PR) + Reg = PR; + } + if (Reg) { + Op = MachineOperand::CreateReg(Reg, false); + IsIndirect = IsDbgDeclare; + } + } + + if (!Op && N.getNode()) { + // Check if frame index is available. + SDValue LCandidate = peekThroughBitcasts(N); + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode())) + if (FrameIndexSDNode *FINode = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + Op = MachineOperand::CreateFI(FINode->getIndex()); + } + + if (!Op) { + // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg + auto splitMultiRegDbgValue + = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) { + unsigned Offset = 0; + for (auto RegAndSize : SplitRegs) { + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, RegAndSize.second); + if (!FragmentExpr) + continue; + assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?"); + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, + RegAndSize.first, Variable, *FragmentExpr)); + Offset += RegAndSize.second; + } + }; + + // Check if ValueMap has reg number. + DenseMap<const Value *, unsigned>::const_iterator + VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + const auto &TLI = DAG.getTargetLoweringInfo(); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, + V->getType(), getABIRegCopyCC(V)); + if (RFV.occupiesMultipleRegs()) { + splitMultiRegDbgValue(RFV.getRegsAndSizes()); + return true; + } + + Op = MachineOperand::CreateReg(VMI->second, false); + IsIndirect = IsDbgDeclare; + } else if (ArgRegsAndSizes.size() > 1) { + // This was split due to the calling convention, and no virtual register + // mapping exists for the value. + splitMultiRegDbgValue(ArgRegsAndSizes); + return true; + } + } + + if (!Op) + return false; + + assert(Variable->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + IsIndirect = (Op->isReg()) ? IsIndirect : true; + if (IsIndirect) + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, + *Op, Variable, Expr)); + + return true; +} + +/// Return the appropriate SDDbgValue based on N. +SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, + DILocalVariable *Variable, + DIExpression *Expr, + const DebugLoc &dl, + unsigned DbgSDNodeOrder) { + if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { + // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe + // stack slot locations. + // + // Consider "int x = 0; int *px = &x;". There are two kinds of interesting + // debug values here after optimization: + // + // dbg.value(i32* %px, !"int *px", !DIExpression()), and + // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref)) + // + // Both describe the direct values of their associated variables. + return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), + /*IsIndirect*/ false, dl, DbgSDNodeOrder); + } + return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), + /*IsIndirect*/ false, dl, DbgSDNodeOrder); +} + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc +#endif + +static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) { + switch (Intrinsic) { + case Intrinsic::smul_fix: + return ISD::SMULFIX; + case Intrinsic::umul_fix: + return ISD::UMULFIX; + default: + llvm_unreachable("Unhandled fixed point intrinsic"); + } +} + +void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I, + const char *FunctionName) { + assert(FunctionName && "FunctionName must not be nullptr"); + SDValue Callee = DAG.getExternalSymbol( + FunctionName, + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); + LowerCallTo(&I, Callee, I.isTailCall()); +} + +/// Lower the call to the specified intrinsic function. +void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, + unsigned Intrinsic) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc sdl = getCurSDLoc(); + DebugLoc dl = getCurDebugLoc(); + SDValue Res; + + switch (Intrinsic) { + default: + // By default, turn this into a target intrinsic node. + visitTargetIntrinsic(I, Intrinsic); + return; + case Intrinsic::vastart: visitVAStart(I); return; + case Intrinsic::vaend: visitVAEnd(I); return; + case Intrinsic::vacopy: visitVACopy(I); return; + case Intrinsic::returnaddress: + setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, + TLI.getPointerTy(DAG.getDataLayout()), + getValue(I.getArgOperand(0)))); + return; + case Intrinsic::addressofreturnaddress: + setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, + TLI.getPointerTy(DAG.getDataLayout()))); + return; + case Intrinsic::sponentry: + setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, + TLI.getFrameIndexTy(DAG.getDataLayout()))); + return; + case Intrinsic::frameaddress: + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, + TLI.getFrameIndexTy(DAG.getDataLayout()), + getValue(I.getArgOperand(0)))); + return; + case Intrinsic::read_register: { + Value *Reg = I.getArgOperand(0); + SDValue Chain = getRoot(); + SDValue RegName = + DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + Res = DAG.getNode(ISD::READ_REGISTER, sdl, + DAG.getVTList(VT, MVT::Other), Chain, RegName); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return; + } + case Intrinsic::write_register: { + Value *Reg = I.getArgOperand(0); + Value *RegValue = I.getArgOperand(1); + SDValue Chain = getRoot(); + SDValue RegName = + DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); + DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, + RegName, getValue(RegValue))); + return; + } + case Intrinsic::setjmp: + lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]); + return; + case Intrinsic::longjmp: + lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]); + return; + case Intrinsic::memcpy: { + const auto &MCI = cast<MemCpyInst>(I); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + // @llvm.memcpy defines 0 and 1 to both mean no alignment. + unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1); + unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1); + unsigned Align = MinAlign(DstAlign, SrcAlign); + bool isVol = MCI.isVolatile(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memcpy DAG + // node. + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + false, isTC, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MC); + return; + } + case Intrinsic::memset: { + const auto &MSI = cast<MemSetInst>(I); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + // @llvm.memset defines 0 and 1 to both mean no alignment. + unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1); + bool isVol = MSI.isVolatile(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0))); + updateDAGForMaybeTailCall(MS); + return; + } + case Intrinsic::memmove: { + const auto &MMI = cast<MemMoveInst>(I); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + // @llvm.memmove defines 0 and 1 to both mean no alignment. + unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1); + unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1); + unsigned Align = MinAlign(DstAlign, SrcAlign); + bool isVol = MMI.isVolatile(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memmove DAG + // node. + SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MM); + return; + } + case Intrinsic::memcpy_element_unordered_atomic: { + const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); + + unsigned DstAlign = MI.getDestAlignment(); + unsigned SrcAlign = MI.getSourceAlignment(); + Type *LengthTy = MI.getLength()->getType(); + unsigned ElemSz = MI.getElementSizeInBytes(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src, + SrcAlign, Length, LengthTy, ElemSz, isTC, + MachinePointerInfo(MI.getRawDest()), + MachinePointerInfo(MI.getRawSource())); + updateDAGForMaybeTailCall(MC); + return; + } + case Intrinsic::memmove_element_unordered_atomic: { + auto &MI = cast<AtomicMemMoveInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); + + unsigned DstAlign = MI.getDestAlignment(); + unsigned SrcAlign = MI.getSourceAlignment(); + Type *LengthTy = MI.getLength()->getType(); + unsigned ElemSz = MI.getElementSizeInBytes(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src, + SrcAlign, Length, LengthTy, ElemSz, isTC, + MachinePointerInfo(MI.getRawDest()), + MachinePointerInfo(MI.getRawSource())); + updateDAGForMaybeTailCall(MC); + return; + } + case Intrinsic::memset_element_unordered_atomic: { + auto &MI = cast<AtomicMemSetInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Val = getValue(MI.getValue()); + SDValue Length = getValue(MI.getLength()); + + unsigned DstAlign = MI.getDestAlignment(); + Type *LengthTy = MI.getLength()->getType(); + unsigned ElemSz = MI.getElementSizeInBytes(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length, + LengthTy, ElemSz, isTC, + MachinePointerInfo(MI.getRawDest())); + updateDAGForMaybeTailCall(MC); + return; + } + case Intrinsic::dbg_addr: + case Intrinsic::dbg_declare: { + const auto &DI = cast<DbgVariableIntrinsic>(I); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); + dropDanglingDebugInfo(Variable, Expression); + assert(Variable && "Missing variable"); + + // Check if address has undef value. + const Value *Address = DI.getVariableLocation(); + if (!Address || isa<UndefValue>(Address) || + (Address->use_empty() && !isa<Argument>(Address))) { + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + return; + } + + bool isParameter = Variable->isParameter() || isa<Argument>(Address); + + // Check if this variable can be described by a frame index, typically + // either as a static alloca or a byval parameter. + int FI = std::numeric_limits<int>::max(); + if (const auto *AI = + dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) { + if (AI->isStaticAlloca()) { + auto I = FuncInfo.StaticAllocaMap.find(AI); + if (I != FuncInfo.StaticAllocaMap.end()) + FI = I->second; + } + } else if (const auto *Arg = dyn_cast<Argument>( + Address->stripInBoundsConstantOffsets())) { + FI = FuncInfo.getArgumentFrameIndex(Arg); + } + + // llvm.dbg.addr is control dependent and always generates indirect + // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in + // the MachineFunction variable table. + if (FI != std::numeric_limits<int>::max()) { + if (Intrinsic == Intrinsic::dbg_addr) { + SDDbgValue *SDV = DAG.getFrameIndexDbgValue( + Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter); + } + return; + } + + SDValue &N = NodeMap[Address]; + if (!N.getNode() && isa<Argument>(Address)) + // Check unused arguments map. + N = UnusedArgNodeMap[Address]; + SDDbgValue *SDV; + if (N.getNode()) { + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + // Parameters are handled specially. + auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); + if (isParameter && FINode) { + // Byval parameter. We have a frame index at this point. + SDV = + DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(), + /*IsIndirect*/ true, dl, SDNodeOrder); + } else if (isa<Argument>(Address)) { + // Address is an argument, so try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N); + return; + } else { + SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), + true, dl, SDNodeOrder); + } + DAG.AddDbgValue(SDV, N.getNode(), isParameter); + } else { + // If Address is an argument then try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, + N)) { + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + } + } + return; + } + case Intrinsic::dbg_label: { + const DbgLabelInst &DI = cast<DbgLabelInst>(I); + DILabel *Label = DI.getLabel(); + assert(Label && "Missing label"); + + SDDbgLabel *SDV; + SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder); + DAG.AddDbgLabel(SDV); + return; + } + case Intrinsic::dbg_value: { + const DbgValueInst &DI = cast<DbgValueInst>(I); + assert(DI.getVariable() && "Missing variable"); + + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); + dropDanglingDebugInfo(Variable, Expression); + const Value *V = DI.getValue(); + if (!V) + return; + + if (handleDebugValue(V, Variable, Expression, dl, DI.getDebugLoc(), + SDNodeOrder)) + return; + + // TODO: Dangling debug info will eventually either be resolved or produce + // an Undef DBG_VALUE. However in the resolution case, a gap may appear + // between the original dbg.value location and its resolved DBG_VALUE, which + // we should ideally fill with an extra Undef DBG_VALUE. + + DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder); + return; + } + + case Intrinsic::eh_typeid_for: { + // Find the type id for the given typeinfo. + GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); + unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV); + Res = DAG.getConstant(TypeID, sdl, MVT::i32); + setValue(&I, Res); + return; + } + + case Intrinsic::eh_return_i32: + case Intrinsic::eh_return_i64: + DAG.getMachineFunction().setCallsEHReturn(true); + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, + MVT::Other, + getControlRoot(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return; + case Intrinsic::eh_unwind_init: + DAG.getMachineFunction().setCallsUnwindInit(true); + return; + case Intrinsic::eh_dwarf_cfa: + setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, + TLI.getPointerTy(DAG.getDataLayout()), + getValue(I.getArgOperand(0)))); + return; + case Intrinsic::eh_sjlj_callsite: { + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); + assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); + assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); + + MMI.setCurrentCallSite(CI->getZExtValue()); + return; + } + case Intrinsic::eh_sjlj_functioncontext: { + // Get and store the index of the function context. + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + AllocaInst *FnCtx = + cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); + int FI = FuncInfo.StaticAllocaMap[FnCtx]; + MFI.setFunctionContextIndex(FI); + return; + } + case Intrinsic::eh_sjlj_setjmp: { + SDValue Ops[2]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, + DAG.getVTList(MVT::i32, MVT::Other), Ops); + setValue(&I, Op.getValue(0)); + DAG.setRoot(Op.getValue(1)); + return; + } + case Intrinsic::eh_sjlj_longjmp: + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, + getRoot(), getValue(I.getArgOperand(0)))); + return; + case Intrinsic::eh_sjlj_setup_dispatch: + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, + getRoot())); + return; + case Intrinsic::masked_gather: + visitMaskedGather(I); + return; + case Intrinsic::masked_load: + visitMaskedLoad(I); + return; + case Intrinsic::masked_scatter: + visitMaskedScatter(I); + return; + case Intrinsic::masked_store: + visitMaskedStore(I); + return; + case Intrinsic::masked_expandload: + visitMaskedLoad(I, true /* IsExpanding */); + return; + case Intrinsic::masked_compressstore: + visitMaskedStore(I, true /* IsCompressing */); + return; + case Intrinsic::powi: + setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG)); + return; + case Intrinsic::log: + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + return; + case Intrinsic::log2: + setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + return; + case Intrinsic::log10: + setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + return; + case Intrinsic::exp: + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + return; + case Intrinsic::exp2: + setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + return; + case Intrinsic::pow: + setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG, TLI)); + return; + case Intrinsic::sqrt: + case Intrinsic::fabs: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + case Intrinsic::round: + case Intrinsic::canonicalize: { + unsigned Opcode; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; + case Intrinsic::fabs: Opcode = ISD::FABS; break; + case Intrinsic::sin: Opcode = ISD::FSIN; break; + case Intrinsic::cos: Opcode = ISD::FCOS; break; + case Intrinsic::floor: Opcode = ISD::FFLOOR; break; + case Intrinsic::ceil: Opcode = ISD::FCEIL; break; + case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; + case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; + } + + setValue(&I, DAG.getNode(Opcode, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return; + } + case Intrinsic::lround: + case Intrinsic::llround: + case Intrinsic::lrint: + case Intrinsic::llrint: { + unsigned Opcode; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::lround: Opcode = ISD::LROUND; break; + case Intrinsic::llround: Opcode = ISD::LLROUND; break; + case Intrinsic::lrint: Opcode = ISD::LRINT; break; + case Intrinsic::llrint: Opcode = ISD::LLRINT; break; + } + + EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(Opcode, sdl, RetVT, + getValue(I.getArgOperand(0)))); + return; + } + case Intrinsic::minnum: + setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return; + case Intrinsic::maxnum: + setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return; + case Intrinsic::minimum: + setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return; + case Intrinsic::maximum: + setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return; + case Intrinsic::copysign: + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return; + case Intrinsic::fma: + setValue(&I, DAG.getNode(ISD::FMA, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + return; + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + case Intrinsic::experimental_constrained_maxnum: + case Intrinsic::experimental_constrained_minnum: + case Intrinsic::experimental_constrained_ceil: + case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: + case Intrinsic::experimental_constrained_round: + case Intrinsic::experimental_constrained_trunc: + visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); + return; + case Intrinsic::fmuladd: { + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && + TLI.isFMAFasterThanFMulAndFAdd(VT)) { + setValue(&I, DAG.getNode(ISD::FMA, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + } else { + // TODO: Intrinsic calls should have fast-math-flags. + SDValue Mul = DAG.getNode(ISD::FMUL, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1))); + SDValue Add = DAG.getNode(ISD::FADD, sdl, + getValue(I.getArgOperand(0)).getValueType(), + Mul, + getValue(I.getArgOperand(2))); + setValue(&I, Add); + } + return; + } + case Intrinsic::convert_to_fp16: + setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, + DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, + getValue(I.getArgOperand(0)), + DAG.getTargetConstant(0, sdl, + MVT::i32)))); + return; + case Intrinsic::convert_from_fp16: + setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, + TLI.getValueType(DAG.getDataLayout(), I.getType()), + DAG.getNode(ISD::BITCAST, sdl, MVT::f16, + getValue(I.getArgOperand(0))))); + return; + case Intrinsic::pcmarker: { + SDValue Tmp = getValue(I.getArgOperand(0)); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); + return; + } + case Intrinsic::readcyclecounter: { + SDValue Op = getRoot(); + Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, + DAG.getVTList(MVT::i64, MVT::Other), Op); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return; + } + case Intrinsic::bitreverse: + setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return; + case Intrinsic::bswap: + setValue(&I, DAG.getNode(ISD::BSWAP, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return; + case Intrinsic::cttz: { + SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, + sdl, Ty, Arg)); + return; + } + case Intrinsic::ctlz: { + SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, + sdl, Ty, Arg)); + return; + } + case Intrinsic::ctpop: { + SDValue Arg = getValue(I.getArgOperand(0)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); + return; + } + case Intrinsic::fshl: + case Intrinsic::fshr: { + bool IsFSHL = Intrinsic == Intrinsic::fshl; + SDValue X = getValue(I.getArgOperand(0)); + SDValue Y = getValue(I.getArgOperand(1)); + SDValue Z = getValue(I.getArgOperand(2)); + EVT VT = X.getValueType(); + SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT); + SDValue Zero = DAG.getConstant(0, sdl, VT); + SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC); + + auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; + if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) { + setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); + return; + } + + // When X == Y, this is rotate. If the data type has a power-of-2 size, we + // avoid the select that is necessary in the general case to filter out + // the 0-shift possibility that leads to UB. + if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) { + auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; + if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { + setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); + return; + } + + // Some targets only rotate one way. Try the opposite direction. + RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL; + if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { + // Negate the shift amount because it is safe to ignore the high bits. + SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); + setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt)); + return; + } + + // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW)) + // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW)) + SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); + SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC); + SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt); + SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt); + setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY)); + return; + } + + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt); + SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt); + SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt); + SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY); + + // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth, + // and that is undefined. We must compare and select to avoid UB. + EVT CCVT = MVT::i1; + if (VT.isVector()) + CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements()); + + // For fshl, 0-shift returns the 1st arg (X). + // For fshr, 0-shift returns the 2nd arg (Y). + SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ); + setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or)); + return; + } + case Intrinsic::sadd_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::uadd_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::ssub_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::usub_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::smul_fix: + case Intrinsic::umul_fix: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl, + Op1.getValueType(), Op1, Op2, Op3)); + return; + } + case Intrinsic::smul_fix_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, + Op3)); + return; + } + case Intrinsic::umul_fix_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, + Op3)); + return; + } + case Intrinsic::stacksave: { + SDValue Op = getRoot(); + Res = DAG.getNode( + ISD::STACKSAVE, sdl, + DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return; + } + case Intrinsic::stackrestore: + Res = getValue(I.getArgOperand(0)); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); + return; + case Intrinsic::get_dynamic_area_offset: { + SDValue Op = getRoot(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); + // Result type for @llvm.get.dynamic.area.offset should match PtrTy for + // target. + if (PtrTy.getSizeInBits() < ResTy.getSizeInBits()) + report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" + " intrinsic!"); + Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), + Op); + DAG.setRoot(Op); + setValue(&I, Res); + return; + } + case Intrinsic::stackguard: { + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); + const Module &M = *MF.getFunction().getParent(); + SDValue Chain = getRoot(); + if (TLI.useLoadStackGuardNode()) { + Res = getLoadStackGuard(DAG, sdl, Chain); + } else { + const Value *Global = TLI.getSDagStackGuard(M); + unsigned Align = DL->getPrefTypeAlignment(Global->getType()); + Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), + MachinePointerInfo(Global, 0), Align, + MachineMemOperand::MOVolatile); + } + if (TLI.useStackGuardXorFP()) + Res = TLI.emitStackGuardXorFP(DAG, Res, sdl); + DAG.setRoot(Chain); + setValue(&I, Res); + return; + } + case Intrinsic::stackprotector: { + // Emit code into the DAG to store the stack guard onto the stack. + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + SDValue Src, Chain = getRoot(); + + if (TLI.useLoadStackGuardNode()) + Src = getLoadStackGuard(DAG, sdl, Chain); + else + Src = getValue(I.getArgOperand(0)); // The guard's value. + + AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); + + int FI = FuncInfo.StaticAllocaMap[Slot]; + MFI.setStackProtectorIndex(FI); + + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + + // Store the stack protector onto the stack. + Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI), + /* Alignment = */ 0, MachineMemOperand::MOVolatile); + setValue(&I, Res); + DAG.setRoot(Res); + return; + } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); + + case Intrinsic::is_constant: + llvm_unreachable("llvm.is.constant.* should have been lowered already"); + + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + // Drop the intrinsic, but forward the value + setValue(&I, getValue(I.getOperand(0))); + return; + case Intrinsic::assume: + case Intrinsic::var_annotation: + case Intrinsic::sideeffect: + // Discard annotate attributes, assumptions, and artificial side-effects. + return; + + case Intrinsic::codeview_annotation: { + // Emit a label associated with this metadata. + MachineFunction &MF = DAG.getMachineFunction(); + MCSymbol *Label = + MF.getMMI().getContext().createTempSymbol("annotation", true); + Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata(); + MF.addCodeViewAnnotation(Label, cast<MDNode>(MD)); + Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label); + DAG.setRoot(Res); + return; + } + + case Intrinsic::init_trampoline: { + const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); + + SDValue Ops[6]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); + Ops[5] = DAG.getSrcValue(F); + + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); + + DAG.setRoot(Res); + return; + } + case Intrinsic::adjust_trampoline: + setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, + TLI.getPointerTy(DAG.getDataLayout()), + getValue(I.getArgOperand(0)))); + return; + case Intrinsic::gcroot: { + assert(DAG.getMachineFunction().getFunction().hasGC() && + "only valid in functions with gc specified, enforced by Verifier"); + assert(GFI && "implied by previous"); + const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); + const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); + + FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); + GFI->addStackRoot(FI->getIndex(), TypeMap); + return; + } + case Intrinsic::gcread: + case Intrinsic::gcwrite: + llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); + case Intrinsic::flt_rounds: + setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); + return; + + case Intrinsic::expect: + // Just replace __builtin_expect(exp, c) with EXP. + setValue(&I, getValue(I.getArgOperand(0))); + return; + + case Intrinsic::debugtrap: + case Intrinsic::trap: { + StringRef TrapFuncName = + I.getAttributes() + .getAttribute(AttributeList::FunctionIndex, "trap-func-name") + .getValueAsString(); + if (TrapFuncName.empty()) { + ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? + ISD::TRAP : ISD::DEBUGTRAP; + DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); + return; + } + TargetLowering::ArgListTy Args; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + CallingConv::C, I.getType(), + DAG.getExternalSymbol(TrapFuncName.data(), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + DAG.setRoot(Result.second); + return; + } + + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: { + ISD::NodeType Op; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; + case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; + case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; + case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; + case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; + case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; + } + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + + EVT ResultVT = Op1.getValueType(); + EVT OverflowVT = MVT::i1; + if (ResultVT.isVector()) + OverflowVT = EVT::getVectorVT( + *Context, OverflowVT, ResultVT.getVectorNumElements()); + + SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT); + setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); + return; + } + case Intrinsic::prefetch: { + SDValue Ops[5]; + unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore; + Ops[0] = DAG.getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = getValue(I.getArgOperand(3)); + SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, + DAG.getVTList(MVT::Other), Ops, + EVT::getIntegerVT(*Context, 8), + MachinePointerInfo(I.getArgOperand(0)), + 0, /* align */ + Flags); + + // Chain the prefetch in parallell with any pending loads, to stay out of + // the way of later optimizations. + PendingLoads.push_back(Result); + Result = getRoot(); + DAG.setRoot(Result); + return; + } + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: { + bool IsStart = (Intrinsic == Intrinsic::lifetime_start); + // Stack coloring is not enabled in O0, discard region information. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + + const int64_t ObjectSize = + cast<ConstantInt>(I.getArgOperand(0))->getSExtValue(); + Value *const ObjectPtr = I.getArgOperand(1); + SmallVector<const Value *, 4> Allocas; + GetUnderlyingObjects(ObjectPtr, Allocas, *DL); + + for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(), + E = Allocas.end(); Object != E; ++Object) { + const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); + + // Could not find an Alloca. + if (!LifetimeObject) + continue; + + // First check that the Alloca is static, otherwise it won't have a + // valid frame index. + auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); + if (SI == FuncInfo.StaticAllocaMap.end()) + return; + + const int FrameIndex = SI->second; + int64_t Offset; + if (GetPointerBaseWithConstantOffset( + ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject) + Offset = -1; // Cannot determine offset from alloca to lifetime object. + Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize, + Offset); + DAG.setRoot(Res); + } + return; + } + case Intrinsic::invariant_start: + // Discard region information. + setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); + return; + case Intrinsic::invariant_end: + // Discard region information. + return; + case Intrinsic::clear_cache: + /// FunctionName may be null. + if (const char *FunctionName = TLI.getClearCacheBuiltinName()) + lowerCallToExternalSymbol(I, FunctionName); + return; + case Intrinsic::donothing: + // ignore + return; + case Intrinsic::experimental_stackmap: + visitStackmap(I); + return; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + visitPatchpoint(&I); + return; + case Intrinsic::experimental_gc_statepoint: + LowerStatepoint(ImmutableStatepoint(&I)); + return; + case Intrinsic::experimental_gc_result: + visitGCResult(cast<GCResultInst>(I)); + return; + case Intrinsic::experimental_gc_relocate: + visitGCRelocate(cast<GCRelocateInst>(I)); + return; + case Intrinsic::instrprof_increment: + llvm_unreachable("instrprof failed to lower an increment"); + case Intrinsic::instrprof_value_profile: + llvm_unreachable("instrprof failed to lower a value profiling call"); + case Intrinsic::localescape: { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); + + // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission + // is the same on all targets. + for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { + Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); + if (isa<ConstantPointerNull>(Arg)) + continue; // Skip null pointers. They represent a hole in index space. + AllocaInst *Slot = cast<AllocaInst>(Arg); + assert(FuncInfo.StaticAllocaMap.count(Slot) && + "can only escape static allocas"); + int FI = FuncInfo.StaticAllocaMap[Slot]; + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol( + GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, + TII->get(TargetOpcode::LOCAL_ESCAPE)) + .addSym(FrameAllocSym) + .addFrameIndex(FI); + } + + return; + } + + case Intrinsic::localrecover: { + // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) + MachineFunction &MF = DAG.getMachineFunction(); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0); + + // Get the symbol that defines the frame offset. + auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); + unsigned IdxVal = + unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max())); + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol( + GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); + + // Create a MCSymbol for the label to avoid any target lowering + // that would make this PC relative. + SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); + SDValue OffsetVal = + DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); + + // Add the offset to the FP. + Value *FP = I.getArgOperand(1); + SDValue FPVal = getValue(FP); + SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); + setValue(&I, Add); + + return; + } + + case Intrinsic::eh_exceptionpointer: + case Intrinsic::eh_exceptioncode: { + // Get the exception pointer vreg, copy from it, and resize it to fit. + const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0)); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); + unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); + SDValue N = + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); + if (Intrinsic == Intrinsic::eh_exceptioncode) + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); + setValue(&I, N); + return; + } + case Intrinsic::xray_customevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return; + + SDLoc DL = getCurSDLoc(); + SmallVector<SDValue, 8> Ops; + + // We want to say that we always want the arguments in registers. + SDValue LogEntryVal = getValue(I.getArgOperand(0)); + SDValue StrSizeVal = getValue(I.getArgOperand(1)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL, + DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return; + } + case Intrinsic::xray_typedevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return; + + SDLoc DL = getCurSDLoc(); + SmallVector<SDValue, 8> Ops; + + // We want to say that we always want the arguments in registers. + // It's unclear to me how manipulating the selection DAG here forces callers + // to provide arguments in registers instead of on the stack. + SDValue LogTypeId = getValue(I.getArgOperand(0)); + SDValue LogEntryVal = getValue(I.getArgOperand(1)); + SDValue StrSizeVal = getValue(I.getArgOperand(2)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogTypeId); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode( + TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return; + } + case Intrinsic::experimental_deoptimize: + LowerDeoptimizeCall(&I); + return; + + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + visitVectorReduce(I, Intrinsic); + return; + + case Intrinsic::icall_branch_funnel: { + SmallVector<SDValue, 16> Ops; + Ops.push_back(getValue(I.getArgOperand(0))); + + int64_t Offset; + auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( + I.getArgOperand(1), Offset, DAG.getDataLayout())); + if (!Base) + report_fatal_error( + "llvm.icall.branch.funnel operand must be a GlobalValue"); + Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0)); + + struct BranchFunnelTarget { + int64_t Offset; + SDValue Target; + }; + SmallVector<BranchFunnelTarget, 8> Targets; + + for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) { + auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( + I.getArgOperand(Op), Offset, DAG.getDataLayout())); + if (ElemBase != Base) + report_fatal_error("all llvm.icall.branch.funnel operands must refer " + "to the same GlobalValue"); + + SDValue Val = getValue(I.getArgOperand(Op + 1)); + auto *GA = dyn_cast<GlobalAddressSDNode>(Val); + if (!GA) + report_fatal_error( + "llvm.icall.branch.funnel operand must be a GlobalValue"); + Targets.push_back({Offset, DAG.getTargetGlobalAddress( + GA->getGlobal(), getCurSDLoc(), + Val.getValueType(), GA->getOffset())}); + } + llvm::sort(Targets, + [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) { + return T1.Offset < T2.Offset; + }); + + for (auto &T : Targets) { + Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32)); + Ops.push_back(T.Target); + } + + Ops.push_back(DAG.getRoot()); // Chain + SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, + getCurSDLoc(), MVT::Other, Ops), + 0); + DAG.setRoot(N); + setValue(&I, N); + HasTailCall = true; + return; + } + + case Intrinsic::wasm_landingpad_index: + // Information this intrinsic contained has been transferred to + // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely + // delete it now. + return; + + case Intrinsic::aarch64_settag: + case Intrinsic::aarch64_settag_zero: { + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); + bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero; + SDValue Val = TSI.EmitTargetCodeForSetTag( + DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)), + ZeroMemory); + DAG.setRoot(Val); + setValue(&I, Val); + return; + } + case Intrinsic::ptrmask: { + SDValue Ptr = getValue(I.getOperand(0)); + SDValue Const = getValue(I.getOperand(1)); + + EVT DestVT = + EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); + + setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr, + DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT))); + return; + } + } +} + +void SelectionDAGBuilder::visitConstrainedFPIntrinsic( + const ConstrainedFPIntrinsic &FPI) { + SDLoc sdl = getCurSDLoc(); + unsigned Opcode; + switch (FPI.getIntrinsicID()) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::experimental_constrained_fadd: + Opcode = ISD::STRICT_FADD; + break; + case Intrinsic::experimental_constrained_fsub: + Opcode = ISD::STRICT_FSUB; + break; + case Intrinsic::experimental_constrained_fmul: + Opcode = ISD::STRICT_FMUL; + break; + case Intrinsic::experimental_constrained_fdiv: + Opcode = ISD::STRICT_FDIV; + break; + case Intrinsic::experimental_constrained_frem: + Opcode = ISD::STRICT_FREM; + break; + case Intrinsic::experimental_constrained_fma: + Opcode = ISD::STRICT_FMA; + break; + case Intrinsic::experimental_constrained_fptosi: + Opcode = ISD::STRICT_FP_TO_SINT; + break; + case Intrinsic::experimental_constrained_fptoui: + Opcode = ISD::STRICT_FP_TO_UINT; + break; + case Intrinsic::experimental_constrained_fptrunc: + Opcode = ISD::STRICT_FP_ROUND; + break; + case Intrinsic::experimental_constrained_fpext: + Opcode = ISD::STRICT_FP_EXTEND; + break; + case Intrinsic::experimental_constrained_sqrt: + Opcode = ISD::STRICT_FSQRT; + break; + case Intrinsic::experimental_constrained_pow: + Opcode = ISD::STRICT_FPOW; + break; + case Intrinsic::experimental_constrained_powi: + Opcode = ISD::STRICT_FPOWI; + break; + case Intrinsic::experimental_constrained_sin: + Opcode = ISD::STRICT_FSIN; + break; + case Intrinsic::experimental_constrained_cos: + Opcode = ISD::STRICT_FCOS; + break; + case Intrinsic::experimental_constrained_exp: + Opcode = ISD::STRICT_FEXP; + break; + case Intrinsic::experimental_constrained_exp2: + Opcode = ISD::STRICT_FEXP2; + break; + case Intrinsic::experimental_constrained_log: + Opcode = ISD::STRICT_FLOG; + break; + case Intrinsic::experimental_constrained_log10: + Opcode = ISD::STRICT_FLOG10; + break; + case Intrinsic::experimental_constrained_log2: + Opcode = ISD::STRICT_FLOG2; + break; + case Intrinsic::experimental_constrained_lrint: + Opcode = ISD::STRICT_LRINT; + break; + case Intrinsic::experimental_constrained_llrint: + Opcode = ISD::STRICT_LLRINT; + break; + case Intrinsic::experimental_constrained_rint: + Opcode = ISD::STRICT_FRINT; + break; + case Intrinsic::experimental_constrained_nearbyint: + Opcode = ISD::STRICT_FNEARBYINT; + break; + case Intrinsic::experimental_constrained_maxnum: + Opcode = ISD::STRICT_FMAXNUM; + break; + case Intrinsic::experimental_constrained_minnum: + Opcode = ISD::STRICT_FMINNUM; + break; + case Intrinsic::experimental_constrained_ceil: + Opcode = ISD::STRICT_FCEIL; + break; + case Intrinsic::experimental_constrained_floor: + Opcode = ISD::STRICT_FFLOOR; + break; + case Intrinsic::experimental_constrained_lround: + Opcode = ISD::STRICT_LROUND; + break; + case Intrinsic::experimental_constrained_llround: + Opcode = ISD::STRICT_LLROUND; + break; + case Intrinsic::experimental_constrained_round: + Opcode = ISD::STRICT_FROUND; + break; + case Intrinsic::experimental_constrained_trunc: + Opcode = ISD::STRICT_FTRUNC; + break; + } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Chain = getRoot(); + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); + ValueVTs.push_back(MVT::Other); // Out chain + + SDVTList VTs = DAG.getVTList(ValueVTs); + SDValue Result; + if (Opcode == ISD::STRICT_FP_ROUND) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + DAG.getTargetConstant(0, sdl, + TLI.getPointerTy(DAG.getDataLayout())) }); + else if (FPI.isUnaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)) }); + else if (FPI.isTernaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)), + getValue(FPI.getArgOperand(2)) }); + else + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)) }); + + if (FPI.getExceptionBehavior() != + ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) { + SDNodeFlags Flags; + Flags.setFPExcept(true); + Result->setFlags(Flags); + } + + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + DAG.setRoot(OutChain); + SDValue FPResult = Result.getValue(0); + setValue(&FPI, FPResult); +} + +std::pair<SDValue, SDValue> +SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + const BasicBlock *EHPadBB) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineModuleInfo &MMI = MF.getMMI(); + MCSymbol *BeginLabel = nullptr; + + if (EHPadBB) { + // Insert a label before the invoke call to mark the try range. This can be + // used to detect deletion of the invoke via the MachineModuleInfo. + BeginLabel = MMI.getContext().createTempSymbol(); + + // For SjLj, keep track of which landing pads go with which invokes + // so as to maintain the ordering of pads in the LSDA. + unsigned CallSiteIndex = MMI.getCurrentCallSite(); + if (CallSiteIndex) { + MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); + LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); + + // Now that the call site is handled, stop tracking it. + MMI.setCurrentCallSite(0); + } + + // Both PendingLoads and PendingExports must be flushed here; + // this call might not return. + (void)getRoot(); + DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); + + CLI.setChain(getRoot()); + } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + + assert((CLI.IsTailCall || Result.second.getNode()) && + "Non-null chain expected with non-tail call!"); + assert((Result.second.getNode() || !Result.first.getNode()) && + "Null value expected with tail call!"); + + if (!Result.second.getNode()) { + // As a special case, a null chain means that a tail call has been emitted + // and the DAG root is already updated. + HasTailCall = true; + + // Since there's no actual continuation from this block, nothing can be + // relying on us setting vregs for them. + PendingExports.clear(); + } else { + DAG.setRoot(Result.second); + } + + if (EHPadBB) { + // Insert a label at the end of the invoke call to mark the try range. This + // can be used to detect deletion of the invoke via the MachineModuleInfo. + MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); + DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); + + // Inform MachineModuleInfo of range. + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + // There is a platform (e.g. wasm) that uses funclet style IR but does not + // actually use outlined funclets and their LSDA info style. + if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) { + assert(CLI.CS); + WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); + EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), + BeginLabel, EndLabel); + } else if (!isScopedEHPersonality(Pers)) { + MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); + } + } + + return Result; +} + +void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, + bool isTailCall, + const BasicBlock *EHPadBB) { + auto &DL = DAG.getDataLayout(); + FunctionType *FTy = CS.getFunctionType(); + Type *RetTy = CS.getType(); + + TargetLowering::ArgListTy Args; + Args.reserve(CS.arg_size()); + + const Value *SwiftErrorVal = nullptr; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // We can't tail call inside a function with a swifterror argument. Lowering + // does not support this yet. It would have to move into the swifterror + // register before the call. + auto *Caller = CS.getInstruction()->getParent()->getParent(); + if (TLI.supportSwiftError() && + Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + isTailCall = false; + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + TargetLowering::ArgListEntry Entry; + const Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + SDValue ArgNode = getValue(V); + Entry.Node = ArgNode; Entry.Ty = V->getType(); + + Entry.setAttributes(&CS, i - CS.arg_begin()); + + // Use swifterror virtual register as input to the call. + if (Entry.IsSwiftError && TLI.supportSwiftError()) { + SwiftErrorVal = V; + // We find the virtual register for the actual swifterror argument. + // Instead of using the Value, we use the virtual register instead. + Entry.Node = DAG.getRegister( + SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V), + EVT(TLI.getPointerTy(DL))); + } + + Args.push_back(Entry); + + // If we have an explicit sret argument that is an Instruction, (i.e., it + // might point to function-local memory), we can't meaningfully tail-call. + if (Entry.IsSRet && isa<Instruction>(V)) + isTailCall = false; + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI->LowerCallTo. + if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) + isTailCall = false; + + // Disable tail calls if there is an swifterror argument. Targets have not + // been updated to support tail calls. + if (TLI.supportSwiftError() && SwiftErrorVal) + isTailCall = false; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(RetTy, FTy, Callee, std::move(Args), CS) + .setTailCall(isTailCall) + .setConvergent(CS.isConvergent()); + std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); + + if (Result.first.getNode()) { + const Instruction *Inst = CS.getInstruction(); + Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first); + setValue(Inst, Result.first); + } + + // The last element of CLI.InVals has the SDValue for swifterror return. + // Here we copy it to a virtual register and update SwiftErrorMap for + // book-keeping. + if (SwiftErrorVal && TLI.supportSwiftError()) { + // Get the last element of InVals. + SDValue Src = CLI.InVals.back(); + Register VReg = SwiftError.getOrCreateVRegDefAt( + CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal); + SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); + DAG.setRoot(CopyNode); + } +} + +static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, + SelectionDAGBuilder &Builder) { + // Check to see if this load can be trivially constant folded, e.g. if the + // input is from a string literal. + if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { + // Cast pointer to the type we really want to load. + Type *LoadTy = + Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits()); + if (LoadVT.isVector()) + LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements()); + + LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), + PointerType::getUnqual(LoadTy)); + + if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( + const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL)) + return Builder.getValue(LoadCst); + } + + // Otherwise, we have to emit the load. If the pointer is to unfoldable but + // still constant memory, the input chain can be the entry node. + SDValue Root; + bool ConstantMemory = false; + + // Do not serialize (non-volatile) loads of constant memory with anything. + if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) { + Root = Builder.DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = Builder.DAG.getRoot(); + } + + SDValue Ptr = Builder.getValue(PtrVal); + SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, + Ptr, MachinePointerInfo(PtrVal), + /* Alignment = */ 1); + + if (!ConstantMemory) + Builder.PendingLoads.push_back(LoadVal.getValue(1)); + return LoadVal; +} + +/// Record the value for an instruction that produces an integer result, +/// converting the type where necessary. +void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, + SDValue Value, + bool IsSigned) { + EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType(), true); + if (IsSigned) + Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); + else + Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); + setValue(&I, Value); +} + +/// See if we can lower a memcmp call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. +bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { + const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); + const Value *Size = I.getArgOperand(2); + const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); + if (CSize && CSize->getZExtValue() == 0) { + EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType(), true); + setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); + return true; + } + + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp( + DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), + getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } + + // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 + // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 + if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I)) + return false; + + // If the target has a fast compare for the given size, it will return a + // preferred load type for that size. Require that the load VT is legal and + // that the target supports unaligned loads of that type. Otherwise, return + // INVALID. + auto hasFastLoadsAndCompare = [&](unsigned NumBits) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT LVT = TLI.hasFastEqualityCompare(NumBits); + if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) { + // TODO: Handle 5 byte compare as 4-byte + 1 byte. + // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. + // TODO: Check alignment of src and dest ptrs. + unsigned DstAS = LHS->getType()->getPointerAddressSpace(); + unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); + if (!TLI.isTypeLegal(LVT) || + !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) || + !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS)) + LVT = MVT::INVALID_SIMPLE_VALUE_TYPE; + } + + return LVT; + }; + + // This turns into unaligned loads. We only do this if the target natively + // supports the MVT we'll be loading or if it is small enough (<= 4) that + // we'll only produce a small number of byte loads. + MVT LoadVT; + unsigned NumBitsToCompare = CSize->getZExtValue() * 8; + switch (NumBitsToCompare) { + default: + return false; + case 16: + LoadVT = MVT::i16; + break; + case 32: + LoadVT = MVT::i32; + break; + case 64: + case 128: + case 256: + LoadVT = hasFastLoadsAndCompare(NumBitsToCompare); + break; + } + + if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE) + return false; + + SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this); + SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this); + + // Bitcast to a wide integer type if the loads are vectors. + if (LoadVT.isVector()) { + EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits()); + LoadL = DAG.getBitcast(CmpVT, LoadL); + LoadR = DAG.getBitcast(CmpVT, LoadR); + } + + SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE); + processIntegerCallValue(I, Cmp, false); + return true; +} + +/// See if we can lower a memchr call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. +bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { + const Value *Src = I.getArgOperand(0); + const Value *Char = I.getArgOperand(1); + const Value *Length = I.getArgOperand(2); + + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Src), getValue(Char), getValue(Length), + MachinePointerInfo(Src)); + if (Res.first.getNode()) { + setValue(&I, Res.first); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// See if we can lower a mempcpy call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. +bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Src = getValue(I.getArgOperand(1)); + SDValue Size = getValue(I.getArgOperand(2)); + + unsigned DstAlign = DAG.InferPtrAlignment(Dst); + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + unsigned Align = std::min(DstAlign, SrcAlign); + if (Align == 0) // Alignment of one or both could not be inferred. + Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved. + + bool isVol = false; + SDLoc sdl = getCurSDLoc(); + + // In the mempcpy context we need to pass in a false value for isTailCall + // because the return pointer needs to be adjusted by the size of + // the copied memory. + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol, + false, /*isTailCall=*/false, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + assert(MC.getNode() != nullptr && + "** memcpy should not be lowered as TailCall in mempcpy context **"); + DAG.setRoot(MC); + + // Check if Size needs to be truncated or extended. + Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType()); + + // Adjust return pointer to point just past the last dst byte. + SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(), + Dst, Size); + setValue(&I, DstPlusSize); + return true; +} + +/// See if we can lower a strcpy call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. +bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1), isStpcpy); + if (Res.first.getNode()) { + setValue(&I, Res.first); + DAG.setRoot(Res.second); + return true; + } + + return false; +} + +/// See if we can lower a strcmp call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. +bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// See if we can lower a strlen call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. +bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { + const Value *Arg0 = I.getArgOperand(0); + + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// See if we can lower a strnlen call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. +bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// See if we can lower a unary floating-point operation into an SDNode with +/// the specified Opcode. If so, return true and lower it, otherwise return +/// false and it will be lowered like a normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. +bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, + unsigned Opcode) { + // We already checked this call's prototype; verify it doesn't modify errno. + if (!I.onlyReadsMemory()) + return false; + + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); + return true; +} + +/// See if we can lower a binary floating-point operation into an SDNode with +/// the specified Opcode. If so, return true and lower it. Otherwise return +/// false, and it will be lowered like a normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. +bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, + unsigned Opcode) { + // We already checked this call's prototype; verify it doesn't modify errno. + if (!I.onlyReadsMemory()) + return false; + + SDValue Tmp0 = getValue(I.getArgOperand(0)); + SDValue Tmp1 = getValue(I.getArgOperand(1)); + EVT VT = Tmp0.getValueType(); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); + return true; +} + +void SelectionDAGBuilder::visitCall(const CallInst &I) { + // Handle inline assembly differently. + if (isa<InlineAsm>(I.getCalledValue())) { + visitInlineAsm(&I); + return; + } + + if (Function *F = I.getCalledFunction()) { + if (F->isDeclaration()) { + // Is this an LLVM intrinsic or a target-specific intrinsic? + unsigned IID = F->getIntrinsicID(); + if (!IID) + if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) + IID = II->getIntrinsicID(F); + + if (IID) { + visitIntrinsicCall(I, IID); + return; + } + } + + // Check for well-known libc/libm calls. If the function is internal, it + // can't be a library call. Don't do the check if marked as nobuiltin for + // some reason or the call site requires strict floating point semantics. + LibFunc Func; + if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() && + F->hasName() && LibInfo->getLibFunc(*F, Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + switch (Func) { + default: break; + case LibFunc_copysign: + case LibFunc_copysignf: + case LibFunc_copysignl: + // We already checked this call's prototype; verify it doesn't modify + // errno. + if (I.onlyReadsMemory()) { + SDValue LHS = getValue(I.getArgOperand(0)); + SDValue RHS = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), + LHS.getValueType(), LHS, RHS)); + return; + } + break; + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: + if (visitUnaryFloatCall(I, ISD::FABS)) + return; + break; + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: + if (visitBinaryFloatCall(I, ISD::FMINNUM)) + return; + break; + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: + if (visitBinaryFloatCall(I, ISD::FMAXNUM)) + return; + break; + case LibFunc_sin: + case LibFunc_sinf: + case LibFunc_sinl: + if (visitUnaryFloatCall(I, ISD::FSIN)) + return; + break; + case LibFunc_cos: + case LibFunc_cosf: + case LibFunc_cosl: + if (visitUnaryFloatCall(I, ISD::FCOS)) + return; + break; + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: + case LibFunc_sqrt_finite: + case LibFunc_sqrtf_finite: + case LibFunc_sqrtl_finite: + if (visitUnaryFloatCall(I, ISD::FSQRT)) + return; + break; + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: + if (visitUnaryFloatCall(I, ISD::FFLOOR)) + return; + break; + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: + if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) + return; + break; + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: + if (visitUnaryFloatCall(I, ISD::FCEIL)) + return; + break; + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: + if (visitUnaryFloatCall(I, ISD::FRINT)) + return; + break; + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: + if (visitUnaryFloatCall(I, ISD::FROUND)) + return; + break; + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: + if (visitUnaryFloatCall(I, ISD::FTRUNC)) + return; + break; + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2l: + if (visitUnaryFloatCall(I, ISD::FLOG2)) + return; + break; + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: + if (visitUnaryFloatCall(I, ISD::FEXP2)) + return; + break; + case LibFunc_memcmp: + if (visitMemCmpCall(I)) + return; + break; + case LibFunc_mempcpy: + if (visitMemPCpyCall(I)) + return; + break; + case LibFunc_memchr: + if (visitMemChrCall(I)) + return; + break; + case LibFunc_strcpy: + if (visitStrCpyCall(I, false)) + return; + break; + case LibFunc_stpcpy: + if (visitStrCpyCall(I, true)) + return; + break; + case LibFunc_strcmp: + if (visitStrCmpCall(I)) + return; + break; + case LibFunc_strlen: + if (visitStrLenCall(I)) + return; + break; + case LibFunc_strnlen: + if (visitStrNLenCall(I)) + return; + break; + } + } + } + + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + "Cannot lower calls with arbitrary operand bundles!"); + + SDValue Callee = getValue(I.getCalledValue()); + + if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) + LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); + else + // Check if we can potentially perform a tail call. More detailed checking + // is be done within LowerCallTo, after more information about the call is + // known. + LowerCallTo(&I, Callee, I.isTailCall()); +} + +namespace { + +/// AsmOperandInfo - This contains information for each constraint that we are +/// lowering. +class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { +public: + /// CallOperand - If this is the result output operand or a clobber + /// this is null, otherwise it is the incoming operand to the CallInst. + /// This gets modified as the asm is processed. + SDValue CallOperand; + + /// AssignedRegs - If this is a register or register class operand, this + /// contains the set of register corresponding to the operand. + RegsForValue AssignedRegs; + + explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) + : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) { + } + + /// Whether or not this operand accesses memory + bool hasMemory(const TargetLowering &TLI) const { + // Indirect operand accesses access memory. + if (isIndirect) + return true; + + for (const auto &Code : Codes) + if (TLI.getConstraintType(Code) == TargetLowering::C_Memory) + return true; + + return false; + } + + /// getCallOperandValEVT - Return the EVT of the Value* that this operand + /// corresponds to. If there is no Value* for this operand, it returns + /// MVT::Other. + EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL) const { + if (!CallOperandVal) return MVT::Other; + + if (isa<BasicBlock>(CallOperandVal)) + return TLI.getPointerTy(DL); + + llvm::Type *OpTy = CallOperandVal->getType(); + + // FIXME: code duplicated from TargetLowering::ParseConstraints(). + // If this is an indirect operand, the operand is a pointer to the + // accessed type. + if (isIndirect) { + PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + if (!PtrTy) + report_fatal_error("Indirect operand for inline asm not a pointer!"); + OpTy = PtrTy->getElementType(); + } + + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. + if (StructType *STy = dyn_cast<StructType>(OpTy)) + if (STy->getNumElements() == 1) + OpTy = STy->getElementType(0); + + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = DL.getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpTy = IntegerType::get(Context, BitSize); + break; + } + } + + return TLI.getValueType(DL, OpTy, true); + } +}; + +using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>; + +} // end anonymous namespace + +/// Make sure that the output operand \p OpInfo and its corresponding input +/// operand \p MatchingOpInfo have compatible constraint types (otherwise error +/// out). +static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo, + SDISelAsmOperandInfo &MatchingOpInfo, + SelectionDAG &DAG) { + if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT) + return; + + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + const auto &TLI = DAG.getTargetLoweringInfo(); + + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode, + MatchingOpInfo.ConstraintVT); + if ((OpInfo.ConstraintVT.isInteger() != + MatchingOpInfo.ConstraintVT.isInteger()) || + (MatchRC.second != InputRC.second)) { + // FIXME: error out in a more elegant fashion + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); + } + MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT; +} + +/// Get a direct memory input to behave well as an indirect operand. +/// This may introduce stores, hence the need for a \p Chain. +/// \return The (possibly updated) chain. +static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, + SDISelAsmOperandInfo &OpInfo, + SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // If we don't have an indirect input, put it in the constpool if we can, + // otherwise spill it to a stack slot. + // TODO: This isn't quite right. We need to handle these according to + // the addressing mode that the constraint wants. Also, this may take + // an additional register for the computation and we don't want that + // either. + + // If the operand is a float, integer, or vector constant, spill to a + // constant pool entry to get its address. + const Value *OpVal = OpInfo.CallOperandVal; + if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || + isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { + OpInfo.CallOperand = DAG.getConstantPool( + cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout())); + return Chain; + } + + // Otherwise, create a stack slot and emit a store to it before the asm. + Type *Ty = OpVal->getType(); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); + unsigned Align = DL.getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); + Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(MF, SSFI), + TLI.getMemValueType(DL, Ty)); + OpInfo.CallOperand = StackSlot; + + return Chain; +} + +/// GetRegistersForValue - Assign registers (virtual or physical) for the +/// specified operand. We prefer to assign virtual registers, to allow the +/// register allocator to handle the assignment process. However, if the asm +/// uses features that we can't model on machineinstrs, we have SDISel do the +/// allocation. This produces generally horrible, but correct, code. +/// +/// OpInfo describes the operand +/// RefOpInfo describes the matching operand if any, the operand otherwise +static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, + SDISelAsmOperandInfo &OpInfo, + SDISelAsmOperandInfo &RefOpInfo) { + LLVMContext &Context = *DAG.getContext(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + MachineFunction &MF = DAG.getMachineFunction(); + SmallVector<unsigned, 4> Regs; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + // No work to do for memory operations. + if (OpInfo.ConstraintType == TargetLowering::C_Memory) + return; + + // If this is a constraint for a single physreg, or a constraint for a + // register class, find it. + unsigned AssignedReg; + const TargetRegisterClass *RC; + std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint( + &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT); + // RC is unset only on failure. Return immediately. + if (!RC) + return; + + // Get the actual register value type. This is important, because the user + // may have asked for (e.g.) the AX register in i32 type. We need to + // remember that AX is actually i16 to get the right extension. + const MVT RegVT = *TRI.legalclasstypes_begin(*RC); + + if (OpInfo.ConstraintVT != MVT::Other) { + // If this is an FP operand in an integer register (or visa versa), or more + // generally if the operand value disagrees with the register class we plan + // to stick it in, fix the operand type. + // + // If this is an input value, the bitcast to the new type is done now. + // Bitcast for output value is done at the end of visitInlineAsm(). + if ((OpInfo.Type == InlineAsm::isOutput || + OpInfo.Type == InlineAsm::isInput) && + !TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) { + // Try to convert to the first EVT that the reg class contains. If the + // types are identical size, use a bitcast to convert (e.g. two differing + // vector types). Note: output bitcast is done at the end of + // visitInlineAsm(). + if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + // Exclude indirect inputs while they are unsupported because the code + // to perform the load is missing and thus OpInfo.CallOperand still + // refers to the input address rather than the pointed-to value. + if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect) + OpInfo.CallOperand = + DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + // If the operand is an FP value and we want it in integer registers, + // use the corresponding integer type. This turns an f64 value into + // i64, which can be passed with two i32 values on a 32-bit machine. + } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { + MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); + if (OpInfo.Type == InlineAsm::isInput) + OpInfo.CallOperand = + DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand); + OpInfo.ConstraintVT = VT; + } + } + } + + // No need to allocate a matching input constraint since the constraint it's + // matching to has already been allocated. + if (OpInfo.isMatchingInputConstraint()) + return; + + EVT ValueVT = OpInfo.ConstraintVT; + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = RegVT; + + // Initialize NumRegs. + unsigned NumRegs = 1; + if (OpInfo.ConstraintVT != MVT::Other) + NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); + + // If this is a constraint for a specific physical register, like {r17}, + // assign it now. + + // If this associated to a specific register, initialize iterator to correct + // place. If virtual, make sure we have enough registers + + // Initialize iterator if necessary + TargetRegisterClass::iterator I = RC->begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + // Do not check for single registers. + if (AssignedReg) { + for (; *I != AssignedReg; ++I) + assert(I != RC->end() && "AssignedReg should be member of RC"); + } + + for (; NumRegs; --NumRegs, ++I) { + assert(I != RC->end() && "Ran out of registers to allocate!"); + Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC); + Regs.push_back(R); + } + + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); +} + +static unsigned +findMatchingInlineAsmOperand(unsigned OperandNo, + const std::vector<SDValue> &AsmNodeOperands) { + // Scan until we find the definition we already emitted of this operand. + unsigned CurOp = InlineAsm::Op_FirstOperand; + for (; OperandNo; --OperandNo) { + // Advance to the next operand. + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + assert((InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag) || + InlineAsm::isMemKind(OpFlag)) && + "Skipped past definitions?"); + CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1; + } + return CurOp; +} + +namespace { + +class ExtraFlags { + unsigned Flags = 0; + +public: + explicit ExtraFlags(ImmutableCallSite CS) { + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + if (IA->hasSideEffects()) + Flags |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + Flags |= InlineAsm::Extra_IsAlignStack; + if (CS.isConvergent()) + Flags |= InlineAsm::Extra_IsConvergent; + Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + } + + void update(const TargetLowering::AsmOperandInfo &OpInfo) { + // Ideally, we would only check against memory constraints. However, the + // meaning of an Other constraint can be target-specific and we can't easily + // reason about it. Therefore, be conservative and set MayLoad/MayStore + // for Other constraints as well. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.Type == InlineAsm::isInput) + Flags |= InlineAsm::Extra_MayLoad; + else if (OpInfo.Type == InlineAsm::isOutput) + Flags |= InlineAsm::Extra_MayStore; + else if (OpInfo.Type == InlineAsm::isClobber) + Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); + } + } + + unsigned get() const { return Flags; } +}; + +} // end anonymous namespace + +/// visitInlineAsm - Handle a call to an InlineAsm object. +void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + + /// ConstraintOperands - Information about all of the constraints. + SDISelAsmOperandInfoVector ConstraintOperands; + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( + DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); + + // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack, + // AsmDialect, MayLoad, MayStore). + bool HasSideEffect = IA->hasSideEffects(); + ExtraFlags ExtraInfo(CS); + + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + for (auto &T : TargetConstraints) { + ConstraintOperands.push_back(SDISelAsmOperandInfo(T)); + SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); + + // Compute the value type for each operand. + if (OpInfo.Type == InlineAsm::isInput || + (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + + // Process the call argument. BasicBlocks are labels, currently appearing + // only in asm's. + const Instruction *I = CS.getInstruction(); + if (isa<CallBrInst>(I) && + (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() - + cast<CallBrInst>(I)->getNumIndirectDests())) { + const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal); + EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true); + OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT); + } else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); + } else { + OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); + } + + OpInfo.ConstraintVT = + OpInfo + .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout()) + .getSimpleVT(); + } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(CS.getType())) { + OpInfo.ConstraintVT = TLI.getSimpleValueType( + DAG.getDataLayout(), STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpInfo.ConstraintVT = + TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); + } + ++ResNo; + } else { + OpInfo.ConstraintVT = MVT::Other; + } + + if (!HasSideEffect) + HasSideEffect = OpInfo.hasMemory(TLI); + + // Determine if this InlineAsm MayLoad or MayStore based on the constraints. + // FIXME: Could we compute this on OpInfo rather than T? + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(T, SDValue()); + + if (T.ConstraintType == TargetLowering::C_Immediate && + OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand)) + // We've delayed emitting a diagnostic like the "n" constraint because + // inlining could cause an integer showing up. + return emitInlineAsmError( + CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an " + "integer constant expression"); + + ExtraInfo.update(T); + } + + + // We won't need to flush pending loads if this asm doesn't touch + // memory and is nonvolatile. + SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot(); + + bool IsCallBr = isa<CallBrInst>(CS.getInstruction()); + if (IsCallBr) { + // If this is a callbr we need to flush pending exports since inlineasm_br + // is a terminator. We need to do this before nodes are glued to + // the inlineasm_br node. + Chain = getControlRoot(); + } + + // Second pass over the constraints: compute which constraint option to use. + for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + patchMatchingInput(OpInfo, Input, DAG); + } + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.Type == InlineAsm::isClobber) + continue; + + // If this is a memory input, and if the operand is not indirect, do what we + // need to provide an address for the memory input. + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + !OpInfo.isIndirect) { + assert((OpInfo.isMultipleAlternative || + (OpInfo.Type == InlineAsm::isInput)) && + "Can only indirectify direct input operands!"); + + // Memory operands really want the address of the value. + Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG); + + // There is no longer a Value* corresponding to this operand. + OpInfo.CallOperandVal = nullptr; + + // It is now an indirect operand. + OpInfo.isIndirect = true; + } + + } + + // AsmNodeOperands - The operands for the ISD::INLINEASM node. + std::vector<SDValue> AsmNodeOperands; + AsmNodeOperands.push_back(SDValue()); // reserve space for input chain + AsmNodeOperands.push_back(DAG.getTargetExternalSymbol( + IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout()))); + + // If we have a !srcloc metadata node associated with it, we want to attach + // this to the ultimately generated inline asm machineinstr. To do this, we + // pass in the third operand as this (potentially null) inline asm MDNode. + const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); + AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); + + // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore + // bits as operand 3. + AsmNodeOperands.push_back(DAG.getTargetConstant( + ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); + + // Third pass: Loop over operands to prepare DAG-level operands.. As part of + // this, assign virtual and physical registers for inputs and otput. + for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { + // Assign Registers. + SDISelAsmOperandInfo &RefOpInfo = + OpInfo.isMatchingInputConstraint() + ? ConstraintOperands[OpInfo.getMatchedOperand()] + : OpInfo; + GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); + + switch (OpInfo.Type) { + case InlineAsm::isOutput: + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && + OpInfo.isIndirect)) { + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + + // Add information to the INLINEASM node to know about this output. + unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), + MVT::i32)); + AsmNodeOperands.push_back(OpInfo.CallOperand); + break; + } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && + !OpInfo.isIndirect) || + OpInfo.ConstraintType == TargetLowering::C_Register || + OpInfo.ConstraintType == TargetLowering::C_RegisterClass) { + // Otherwise, this outputs to a register (directly for C_Register / + // C_RegisterClass, and a target-defined fashion for + // C_Immediate/C_Other). Find a register that we can use. + if (OpInfo.AssignedRegs.Regs.empty()) { + emitInlineAsmError( + CS, "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + + // Add information to the INLINEASM node to know that this register is + // set. + OpInfo.AssignedRegs.AddInlineAsmOperands( + OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber + : InlineAsm::Kind_RegDef, + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); + } + break; + + case InlineAsm::isInput: { + SDValue InOperandVal = OpInfo.CallOperand; + + if (OpInfo.isMatchingInputConstraint()) { + // If this is required to match an output register we have already set, + // just use its register. + auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(), + AsmNodeOperands); + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + if (InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { + // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. + if (OpInfo.isIndirect) { + // This happens on gcc/testsuite/gcc.dg/pr8788-1.c + emitInlineAsmError(CS, "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); + return; + } + + MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); + SmallVector<unsigned, 4> Regs; + + if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) { + unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); + MachineRegisterInfo &RegInfo = + DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(RegInfo.createVirtualRegister(RC)); + } else { + emitInlineAsmError(CS, "inline asm error: This value type register " + "class is not natively supported!"); + return; + } + + RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType()); + + SDLoc dl = getCurSDLoc(); + // Use the produced MatchedRegs object to + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, + CS.getInstruction()); + MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, + true, OpInfo.getMatchedOperand(), dl, + DAG, AsmNodeOperands); + break; + } + + assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); + assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && + "Unexpected number of operands"); + // Add information to the INLINEASM node to know about this input. + // See InlineAsm.h isUseOperandTiedToDef. + OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); + OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, + OpInfo.getMatchedOperand()); + AsmNodeOperands.push_back(DAG.getTargetConstant( + OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); + AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); + break; + } + + // Treat indirect 'X' constraint as memory. + if ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && + OpInfo.isIndirect) + OpInfo.ConstraintType = TargetLowering::C_Memory; + + if (OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) { + std::vector<SDValue> Ops; + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, + Ops, DAG); + if (Ops.empty()) { + if (OpInfo.ConstraintType == TargetLowering::C_Immediate) + if (isa<ConstantSDNode>(InOperandVal)) { + emitInlineAsmError(CS, "value out of range for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + + emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = + InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); + AsmNodeOperands.push_back(DAG.getTargetConstant( + ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); + AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); + break; + } + + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + assert(InOperandVal.getValueType() == + TLI.getPointerTy(DAG.getDataLayout()) && + "Memory operands expect pointer values"); + + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + getCurSDLoc(), + MVT::i32)); + AsmNodeOperands.push_back(InOperandVal); + break; + } + + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Register || + OpInfo.ConstraintType == TargetLowering::C_Immediate) && + "Unknown constraint type!"); + + // TODO: Support this. + if (OpInfo.isIndirect) { + emitInlineAsmError( + CS, "Don't know how to handle indirect register inputs yet " + "for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + + // Copy the input into the appropriate registers. + if (OpInfo.AssignedRegs.Regs.empty()) { + emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + + SDLoc dl = getCurSDLoc(); + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, + Chain, &Flag, CS.getInstruction()); + + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, + dl, DAG, AsmNodeOperands); + break; + } + case InlineAsm::isClobber: + // Add the clobbered value to the operand list, so that the register + // allocator is aware that the physreg got clobbered. + if (!OpInfo.AssignedRegs.Regs.empty()) + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, + false, 0, getCurSDLoc(), DAG, + AsmNodeOperands); + break; + } + } + + // Finish up input operands. Set the input chain and add the flag last. + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + if (Flag.getNode()) AsmNodeOperands.push_back(Flag); + + unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM; + Chain = DAG.getNode(ISDOpc, getCurSDLoc(), + DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); + Flag = Chain.getValue(1); + + // Do additional work to generate outputs. + + SmallVector<EVT, 1> ResultVTs; + SmallVector<SDValue, 1> ResultValues; + SmallVector<SDValue, 8> OutChains; + + llvm::Type *CSResultType = CS.getType(); + ArrayRef<Type *> ResultTypes; + if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) + ResultTypes = StructResult->elements(); + else if (!CSResultType->isVoidTy()) + ResultTypes = makeArrayRef(CSResultType); + + auto CurResultType = ResultTypes.begin(); + auto handleRegAssign = [&](SDValue V) { + assert(CurResultType != ResultTypes.end() && "Unexpected value"); + assert((*CurResultType)->isSized() && "Unexpected unsized type"); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType); + ++CurResultType; + // If the type of the inline asm call site return value is different but has + // same size as the type of the asm output bitcast it. One example of this + // is for vectors with different width / number of elements. This can + // happen for register classes that can contain multiple different value + // types. The preg or vreg allocated may not have the same VT as was + // expected. + // + // This can also happen for a return value that disagrees with the register + // class it is put in, eg. a double in a general-purpose register on a + // 32-bit machine. + if (ResultVT != V.getValueType() && + ResultVT.getSizeInBits() == V.getValueSizeInBits()) + V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V); + else if (ResultVT != V.getValueType() && ResultVT.isInteger() && + V.getValueType().isInteger()) { + // If a result value was tied to an input value, the computed result + // may have a wider width than the expected result. Extract the + // relevant portion. + V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V); + } + assert(ResultVT == V.getValueType() && "Asm result value mismatch!"); + ResultVTs.push_back(ResultVT); + ResultValues.push_back(V); + }; + + // Deal with output operands. + for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { + if (OpInfo.Type == InlineAsm::isOutput) { + SDValue Val; + // Skip trivial output operands. + if (OpInfo.AssignedRegs.Regs.empty()) + continue; + + switch (OpInfo.ConstraintType) { + case TargetLowering::C_Register: + case TargetLowering::C_RegisterClass: + Val = OpInfo.AssignedRegs.getCopyFromRegs( + DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); + break; + case TargetLowering::C_Immediate: + case TargetLowering::C_Other: + Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(), + OpInfo, DAG); + break; + case TargetLowering::C_Memory: + break; // Already handled. + case TargetLowering::C_Unknown: + assert(false && "Unexpected unknown constraint"); + } + + // Indirect output manifest as stores. Record output chains. + if (OpInfo.isIndirect) { + const Value *Ptr = OpInfo.CallOperandVal; + assert(Ptr && "Expected value CallOperandVal for indirect asm operand"); + SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr), + MachinePointerInfo(Ptr)); + OutChains.push_back(Store); + } else { + // generate CopyFromRegs to associated registers. + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + if (Val.getOpcode() == ISD::MERGE_VALUES) { + for (const SDValue &V : Val->op_values()) + handleRegAssign(V); + } else + handleRegAssign(Val); + } + } + } + + // Set results. + if (!ResultValues.empty()) { + assert(CurResultType == ResultTypes.end() && + "Mismatch in number of ResultTypes"); + assert(ResultValues.size() == ResultTypes.size() && + "Mismatch in number of output operands in asm result"); + + SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(ResultVTs), ResultValues); + setValue(CS.getInstruction(), V); + } + + // Collect store chains. + if (!OutChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); + + // Only Update Root if inline assembly has a memory effect. + if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr) + DAG.setRoot(Chain); +} + +void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, + const Twine &Message) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), Message); + + // Make sure we leave the DAG in a valid state + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); + + if (ValueVTs.empty()) + return; + + SmallVector<SDValue, 1> Ops; + for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i) + Ops.push_back(DAG.getUNDEF(ValueVTs[i])); + + setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc())); +} + +void SelectionDAGBuilder::visitVAStart(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); +} + +void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + SDValue V = DAG.getVAArg( + TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(), + getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), + DL.getABITypeAlignment(I.getType())); + DAG.setRoot(V.getValue(1)); + + if (I.getType()->isPointerTy()) + V = DAG.getPtrExtOrTrunc( + V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType())); + setValue(&I, V); +} + +void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); +} + +void SelectionDAGBuilder::visitVACopy(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getSrcValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(1)))); +} + +SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, + const Instruction &I, + SDValue Op) { + const MDNode *Range = I.getMetadata(LLVMContext::MD_range); + if (!Range) + return Op; + + ConstantRange CR = getConstantRangeFromMetadata(*Range); + if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped()) + return Op; + + APInt Lo = CR.getUnsignedMin(); + if (!Lo.isMinValue()) + return Op; + + APInt Hi = CR.getUnsignedMax(); + unsigned Bits = std::max(Hi.getActiveBits(), + static_cast<unsigned>(IntegerType::MIN_INT_BITS)); + + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); + + SDLoc SL = getCurSDLoc(); + + SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op, + DAG.getValueType(SmallVT)); + unsigned NumVals = Op.getNode()->getNumValues(); + if (NumVals == 1) + return ZExt; + + SmallVector<SDValue, 4> Ops; + + Ops.push_back(ZExt); + for (unsigned I = 1; I != NumVals; ++I) + Ops.push_back(Op.getValue(I)); + + return DAG.getMergeValues(Ops, SL); +} + +/// Populate a CallLowerinInfo (into \p CLI) based on the properties of +/// the call being lowered. +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +void SelectionDAGBuilder::populateCallLoweringInfo( + TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, + unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, + bool IsPatchPoint) { + TargetLowering::ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; + ArgI != ArgE; ++ArgI) { + const Value *V = Call->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + TargetLowering::ArgListEntry Entry; + Entry.Node = getValue(V); + Entry.Ty = V->getType(); + Entry.setAttributes(Call, ArgI); + Args.push_back(Entry); + } + + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args)) + .setDiscardResult(Call->use_empty()) + .setIsPatchPoint(IsPatchPoint); +} + +/// Add a stack map intrinsic call's live variable operands to a stackmap +/// or patchpoint target node's operand list. +/// +/// Constants are converted to TargetConstants purely as an optimization to +/// avoid constant materialization and register allocation. +/// +/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not +/// generate addess computation nodes, and so FinalizeISel can convert the +/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids +/// address materialization and register allocation, but may also be required +/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an +/// alloca in the entry block, then the runtime may assume that the alloca's +/// StackMap location can be read immediately after compilation and that the +/// location is valid at any point during execution (this is similar to the +/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were +/// only available in a register, then the runtime would need to trap when +/// execution reaches the StackMap in order to read the alloca's location. +static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, + const SDLoc &DL, SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder &Builder) { + for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { + SDValue OpVal = Builder.getValue(CS.getArgument(i)); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + Ops.push_back( + Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back(Builder.DAG.getTargetFrameIndex( + FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout()))); + } else + Ops.push_back(OpVal); + } +} + +/// Lower llvm.experimental.stackmap directly to its target opcode. +void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { + // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, + // [live variables...]) + + assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); + + SDValue Chain, InFlag, Callee, NullPtr; + SmallVector<SDValue, 32> Ops; + + SDLoc DL = getCurSDLoc(); + Callee = getValue(CI.getCalledValue()); + NullPtr = DAG.getIntPtrConstant(0, DL, true); + + // The stackmap intrinsic only records the live variables (the arguemnts + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target specific lowering code. + // Instead we perform the call lowering right here. + // + // chain, flag = CALLSEQ_START(chain, 0, 0) + // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) + // chain, flag = CALLSEQ_END(chain, 0, 0, flag) + // + Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL); + InFlag = Chain.getValue(1); + + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL, + MVT::i32)); + + // Push live variables for the stack map. + addStackMapLiveVars(&CI, 2, DL, Ops, *this); + + // We are not pushing any register mask info here on the operands list, + // because the stackmap doesn't clobber anything. + + // Push the chain and the glue flag. + Ops.push_back(Chain); + Ops.push_back(InFlag); + + // Create the STACKMAP node. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); + Chain = SDValue(SM, 0); + InFlag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); + + // Stackmaps don't generate values, so nothing goes into the NodeMap. + + // Set the root to the target-lowered call chain. + DAG.setRoot(Chain); + + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo().setHasStackMap(); +} + +/// Lower llvm.experimental.patchpoint directly to its target opcode. +void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, + const BasicBlock *EHPadBB) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) + + CallingConv::ID CC = CS.getCallingConv(); + bool IsAnyRegCC = CC == CallingConv::AnyReg; + bool HasDef = !CS->getType()->isVoidTy(); + SDLoc dl = getCurSDLoc(); + SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); + + // Handle immediate and symbolic callees. + if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee)) + Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl, + /*isTarget=*/true); + else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(), + SDLoc(SymbolicCallee), + SymbolicCallee->getValueType(0)); + + // Get the real number of arguments participating in the call <numArgs> + SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); + unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); + + // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> + // Intrinsics include all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(CS.arg_size() >= NumMetaOpers + NumArgs && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; + Type *ReturnTy = + IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); + + TargetLowering::CallLoweringInfo CLI(DAG); + populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()), + NumMetaOpers, NumCallArgs, Callee, ReturnTy, true); + std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); + + SDNode *CallEnd = Result.second.getNode(); + if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + CallEnd = CallEnd->getOperand(0).getNode(); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool HasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the patchable intrinsic. + SmallVector<SDValue, 8> Ops; + + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64)); + SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl, + MVT::i32)); + + // Add the callee. + Ops.push_back(Callee); + + // Adjust <numArgs> to account for any arguments that have been passed on the + // stack instead. + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); + NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32)); + + // Add the calling convention + Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (IsAnyRegCC) + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) + Ops.push_back(getValue(CS.getArgument(i))); + + // Push the arguments from the call instruction up to the register mask. + SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; + Ops.append(Call->op_begin() + 2, e); + + // Push live variables for the stack map. + addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); + + // Push the register mask info. + if (HasGlue) + Ops.push_back(*(Call->op_end()-2)); + else + Ops.push_back(*(Call->op_end()-1)); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (HasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys; + if (IsAnyRegCC && HasDef) { + // Create the return types based on the intrinsic definition + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 3> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); + assert(ValueVTs.size() == 1 && "Expected only one return value type."); + + // There is always a chain and a glue type at the end + ValueVTs.push_back(MVT::Other); + ValueVTs.push_back(MVT::Glue); + NodeTys = DAG.getVTList(ValueVTs); + } else + NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a PATCHPOINT node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, + dl, NodeTys, Ops); + + // Update the NodeMap. + if (HasDef) { + if (IsAnyRegCC) + setValue(CS.getInstruction(), SDValue(MN, 0)); + else + setValue(CS.getInstruction(), Result.first); + } + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. Furthermore the location of the chain and glue can change + // when the AnyReg calling convention is used and the intrinsic returns a + // value. + if (IsAnyRegCC && HasDef) { + SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; + SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + } else + DAG.ReplaceAllUsesWith(Call, MN); + DAG.DeleteNode(Call); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo().setHasPatchPoint(); +} + +void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, + unsigned Intrinsic) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2; + if (I.getNumArgOperands() > 1) + Op2 = getValue(I.getArgOperand(1)); + SDLoc dl = getCurSDLoc(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDValue Res; + FastMathFlags FMF; + if (isa<FPMathOperator>(I)) + FMF = I.getFastMathFlags(); + + switch (Intrinsic) { + case Intrinsic::experimental_vector_reduce_v2_fadd: + if (FMF.allowReassoc()) + Res = DAG.getNode(ISD::FADD, dl, VT, Op1, + DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2)); + else + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); + break; + case Intrinsic::experimental_vector_reduce_v2_fmul: + if (FMF.allowReassoc()) + Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, + DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2)); + else + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); + break; + case Intrinsic::experimental_vector_reduce_add: + Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_mul: + Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_and: + Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_or: + Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_xor: + Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_smax: + Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_smin: + Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_umax: + Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_umin: + Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_fmax: + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_fmin: + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1); + break; + default: + llvm_unreachable("Unhandled vector reduce intrinsic"); + } + setValue(&I, Res); +} + +/// Returns an AttributeList representing the attributes applied to the return +/// value of the given call. +static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { + SmallVector<Attribute::AttrKind, 2> Attrs; + if (CLI.RetSExt) + Attrs.push_back(Attribute::SExt); + if (CLI.RetZExt) + Attrs.push_back(Attribute::ZExt); + if (CLI.IsInReg) + Attrs.push_back(Attribute::InReg); + + return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex, + Attrs); +} + +/// TargetLowering::LowerCallTo - This is the default LowerCallTo +/// implementation, which just calls LowerCall. +/// FIXME: When all targets are +/// migrated to using LowerCall, this hook should be integrated into SDISel. +std::pair<SDValue, SDValue> +TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { + // Handle the incoming return values from the call. + CLI.Ins.clear(); + Type *OrigRetTy = CLI.RetTy; + SmallVector<EVT, 4> RetTys; + SmallVector<uint64_t, 4> Offsets; + auto &DL = CLI.DAG.getDataLayout(); + ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); + + if (CLI.IsPostTypeLegalization) { + // If we are lowering a libcall after legalization, split the return type. + SmallVector<EVT, 4> OldRetTys; + SmallVector<uint64_t, 4> OldOffsets; + RetTys.swap(OldRetTys); + Offsets.swap(OldOffsets); + + for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { + EVT RetVT = OldRetTys[i]; + uint64_t Offset = OldOffsets[i]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); + unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8; + RetTys.append(NumRegs, RegisterVT); + for (unsigned j = 0; j != NumRegs; ++j) + Offsets.push_back(Offset + j * RegisterVTByteSZ); + } + } + + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); + + bool CanLowerReturn = + this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), + CLI.IsVarArg, Outs, CLI.RetTy->getContext()); + + SDValue DemoteStackSlot; + int DemoteStackIdx = -100; + if (!CanLowerReturn) { + // FIXME: equivalent assert? + // assert(!CS.hasInAllocaArgument() && + // "sret demotion is incompatible with inalloca"); + uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); + unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); + MachineFunction &MF = CLI.DAG.getMachineFunction(); + DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); + Type *StackSlotPtrType = PointerType::get(CLI.RetTy, + DL.getAllocaAddrSpace()); + + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); + ArgListEntry Entry; + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.IsSExt = false; + Entry.IsZExt = false; + Entry.IsInReg = false; + Entry.IsSRet = true; + Entry.IsNest = false; + Entry.IsByVal = false; + Entry.IsReturned = false; + Entry.IsSwiftSelf = false; + Entry.IsSwiftError = false; + Entry.Alignment = Align; + CLI.getArgs().insert(CLI.getArgs().begin(), Entry); + CLI.NumFixedArgs += 1; + CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); + + // sret demotion isn't compatible with tail-calls, since the sret argument + // points into the callers stack frame. + CLI.IsTailCall = false; + } else { + bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( + CLI.RetTy, CLI.CallConv, CLI.IsVarArg); + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + ISD::ArgFlagsTy Flags; + if (NeedsRegBlock) { + Flags.setInConsecutiveRegs(); + if (I == RetTys.size() - 1) + Flags.setInConsecutiveRegsLast(); + } + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); + unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.Flags = Flags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetTy->isPointerTy()) { + MyFlags.Flags.setPointer(); + MyFlags.Flags.setPointerAddrSpace( + cast<PointerType>(CLI.RetTy)->getAddressSpace()); + } + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + } + + // We push in swifterror return as the last element of CLI.Ins. + ArgListTy &Args = CLI.getArgs(); + if (supportSwiftError()) { + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + if (Args[i].IsSwiftError) { + ISD::InputArg MyFlags; + MyFlags.VT = getPointerTy(DL); + MyFlags.ArgVT = EVT(getPointerTy(DL)); + MyFlags.Flags.setSwiftError(); + CLI.Ins.push_back(MyFlags); + } + } + } + + // Handle all of the outgoing arguments. + CLI.Outs.clear(); + CLI.OutVals.clear(); + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); + // FIXME: Split arguments if CLI.IsPostTypeLegalization + Type *FinalType = Args[i].Ty; + if (Args[i].IsByVal) + FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); + bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( + FinalType, CLI.CallConv, CLI.IsVarArg); + for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; + ++Value) { + EVT VT = ValueVTs[Value]; + Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); + SDValue Op = SDValue(Args[i].Node.getNode(), + Args[i].Node.getResNo() + Value); + ISD::ArgFlagsTy Flags; + + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); + + if (Args[i].Ty->isPointerTy()) { + Flags.setPointer(); + Flags.setPointerAddrSpace( + cast<PointerType>(Args[i].Ty)->getAddressSpace()); + } + if (Args[i].IsZExt) + Flags.setZExt(); + if (Args[i].IsSExt) + Flags.setSExt(); + if (Args[i].IsInReg) { + // If we are using vectorcall calling convention, a structure that is + // passed InReg - is surely an HVA + if (CLI.CallConv == CallingConv::X86_VectorCall && + isa<StructType>(FinalType)) { + // The first value of a structure is marked + if (0 == Value) + Flags.setHvaStart(); + Flags.setHva(); + } + // Set InReg Flag + Flags.setInReg(); + } + if (Args[i].IsSRet) + Flags.setSRet(); + if (Args[i].IsSwiftSelf) + Flags.setSwiftSelf(); + if (Args[i].IsSwiftError) + Flags.setSwiftError(); + if (Args[i].IsByVal) + Flags.setByVal(); + if (Args[i].IsInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Args[i].IsByVal || Args[i].IsInAlloca) { + PointerType *Ty = cast<PointerType>(Args[i].Ty); + Type *ElementTy = Ty->getElementType(); + + unsigned FrameSize = DL.getTypeAllocSize( + Args[i].ByValType ? Args[i].ByValType : ElementTy); + Flags.setByValSize(FrameSize); + + // info is not there but there are cases it cannot get right. + unsigned FrameAlign; + if (Args[i].Alignment) + FrameAlign = Args[i].Alignment; + else + FrameAlign = getByValTypeAlignment(ElementTy, DL); + Flags.setByValAlign(Align(FrameAlign)); + } + if (Args[i].IsNest) + Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); + Flags.setOrigAlign(OriginalAlignment); + + MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); + unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); + SmallVector<SDValue, 4> Parts(NumParts); + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (Args[i].IsSExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (Args[i].IsZExt) + ExtendKind = ISD::ZERO_EXTEND; + + // Conservatively only handle 'returned' on non-vectors that can be lowered, + // for now. + if (Args[i].IsReturned && !Op.getValueType().isVector() && + CanLowerReturn) { + assert((CLI.RetTy == Args[i].Ty || + (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() && + CLI.RetTy->getPointerAddressSpace() == + Args[i].Ty->getPointerAddressSpace())) && + RetTys.size() == NumValues && "unexpected use of 'returned'"); + // Before passing 'returned' to the target lowering code, ensure that + // either the register MVT and the actual EVT are the same size or that + // the return value and argument are extended in the same way; in these + // cases it's safe to pass the argument register value unchanged as the + // return register value (although it's at the target's option whether + // to do so) + // TODO: allow code generation to take advantage of partially preserved + // registers rather than clobbering the entire register when the + // parameter extension method is not compatible with the return + // extension method + if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || + (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt && + CLI.RetZExt == Args[i].IsZExt)) + Flags.setReturned(); + } + + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, + CLI.CS.getInstruction(), CLI.CallConv, ExtendKind); + + for (unsigned j = 0; j != NumParts; ++j) { + // if it isn't first piece, alignment must be 1 + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, + i < CLI.NumFixedArgs, + i, j*Parts[j].getValueType().getStoreSize()); + if (NumParts > 1 && j == 0) + MyFlags.Flags.setSplit(); + else if (j != 0) { + MyFlags.Flags.setOrigAlign(Align::None()); + if (j == NumParts - 1) + MyFlags.Flags.setSplitEnd(); + } + + CLI.Outs.push_back(MyFlags); + CLI.OutVals.push_back(Parts[j]); + } + + if (NeedsRegBlock && Value == NumValues - 1) + CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); + } + } + + SmallVector<SDValue, 4> InVals; + CLI.Chain = LowerCall(CLI, InVals); + + // Update CLI.InVals to use outside of this function. + CLI.InVals = InVals; + + // Verify that the target's LowerCall behaved as expected. + assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && + "LowerCall didn't return a valid chain!"); + assert((!CLI.IsTailCall || InVals.empty()) && + "LowerCall emitted a return value for a tail call!"); + assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && + "LowerCall didn't emit the correct number of values!"); + + // For a tail call, the return value is merely live-out and there aren't + // any nodes in the DAG representing it. Return a special value to + // indicate that a tail call has been emitted and no more Instructions + // should be processed in the current block. + if (CLI.IsTailCall) { + CLI.DAG.setRoot(CLI.Chain); + return std::make_pair(SDValue(), SDValue()); + } + +#ifndef NDEBUG + for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && "LowerCall emitted a null value!"); + assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && + "LowerCall emitted a value with the wrong type!"); + } +#endif + + SmallVector<SDValue, 4> ReturnValues; + if (!CanLowerReturn) { + // The instruction result is the result of loading from the + // hidden sret parameter. + SmallVector<EVT, 1> PVTs; + Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace()); + + ComputeValueVTs(*this, DL, PtrRetTy, PVTs); + assert(PVTs.size() == 1 && "Pointers should fit in one register"); + EVT PtrVT = PVTs[0]; + + unsigned NumValues = RetTys.size(); + ReturnValues.resize(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, + CLI.DAG.getConstant(Offsets[i], CLI.DL, + PtrVT), Flags); + SDValue L = CLI.DAG.getLoad( + RetTys[i], CLI.DL, CLI.Chain, Add, + MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), + DemoteStackIdx, Offsets[i]), + /* Alignment = */ 1); + ReturnValues[i] = L; + Chains[i] = L.getValue(1); + } + + CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); + } else { + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + Optional<ISD::NodeType> AssertOp; + if (CLI.RetSExt) + AssertOp = ISD::AssertSext; + else if (CLI.RetZExt) + AssertOp = ISD::AssertZext; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); + unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); + + ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], + NumRegs, RegisterVT, VT, nullptr, + CLI.CallConv, AssertOp)); + CurReg += NumRegs; + } + + // For a function returning void, there is no return value. We can't create + // such a node, so we just return a null return value in that case. In + // that case, nothing will actually look at the value. + if (ReturnValues.empty()) + return std::make_pair(SDValue(), CLI.Chain); + } + + SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, + CLI.DAG.getVTList(RetTys), ReturnValues); + return std::make_pair(Res, CLI.Chain); +} + +void TargetLowering::LowerOperationWrapper(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + if (SDValue Res = LowerOperation(SDValue(N, 0), DAG)) + Results.push_back(Res); +} + +SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + llvm_unreachable("LowerOperation not implemented for this target!"); +} + +void +SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { + SDValue Op = getNonRegisterValue(V); + assert((Op.getOpcode() != ISD::CopyFromReg || + cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && + "Copy from a reg to the same reg!"); + assert(!Register::isPhysicalRegister(Reg) && "Is a physreg"); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // If this is an InlineAsm we have to match the registers required, not the + // notional registers required by the type. + + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), + None); // This is not an ABI copy. + SDValue Chain = DAG.getEntryNode(); + + ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == + FuncInfo.PreferredExtendType.end()) + ? ISD::ANY_EXTEND + : FuncInfo.PreferredExtendType[V]; + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); + PendingExports.push_back(Chain); +} + +#include "llvm/CodeGen/SelectionDAGISel.h" + +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { + // With FastISel active, we may be splitting blocks, so force creation + // of virtual registers for all non-dead arguments. + if (FastISel) + return A->use_empty(); + + const BasicBlock &Entry = A->getParent()->front(); + for (const User *U : A->users()) + if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U)) + return false; // Use not in entry block. + + return true; +} + +using ArgCopyElisionMapTy = + DenseMap<const Argument *, + std::pair<const AllocaInst *, const StoreInst *>>; + +/// Scan the entry block of the function in FuncInfo for arguments that look +/// like copies into a local alloca. Record any copied arguments in +/// ArgCopyElisionCandidates. +static void +findArgumentCopyElisionCandidates(const DataLayout &DL, + FunctionLoweringInfo *FuncInfo, + ArgCopyElisionMapTy &ArgCopyElisionCandidates) { + // Record the state of every static alloca used in the entry block. Argument + // allocas are all used in the entry block, so we need approximately as many + // entries as we have arguments. + enum StaticAllocaInfo { Unknown, Clobbered, Elidable }; + SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas; + unsigned NumArgs = FuncInfo->Fn->arg_size(); + StaticAllocas.reserve(NumArgs * 2); + + auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * { + if (!V) + return nullptr; + V = V->stripPointerCasts(); + const auto *AI = dyn_cast<AllocaInst>(V); + if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI)) + return nullptr; + auto Iter = StaticAllocas.insert({AI, Unknown}); + return &Iter.first->second; + }; + + // Look for stores of arguments to static allocas. Look through bitcasts and + // GEPs to handle type coercions, as long as the alloca is fully initialized + // by the store. Any non-store use of an alloca escapes it and any subsequent + // unanalyzed store might write it. + // FIXME: Handle structs initialized with multiple stores. + for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) { + // Look for stores, and handle non-store uses conservatively. + const auto *SI = dyn_cast<StoreInst>(&I); + if (!SI) { + // We will look through cast uses, so ignore them completely. + if (I.isCast()) + continue; + // Ignore debug info intrinsics, they don't escape or store to allocas. + if (isa<DbgInfoIntrinsic>(I)) + continue; + // This is an unknown instruction. Assume it escapes or writes to all + // static alloca operands. + for (const Use &U : I.operands()) { + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U)) + *Info = StaticAllocaInfo::Clobbered; + } + continue; + } + + // If the stored value is a static alloca, mark it as escaped. + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand())) + *Info = StaticAllocaInfo::Clobbered; + + // Check if the destination is a static alloca. + const Value *Dst = SI->getPointerOperand()->stripPointerCasts(); + StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst); + if (!Info) + continue; + const AllocaInst *AI = cast<AllocaInst>(Dst); + + // Skip allocas that have been initialized or clobbered. + if (*Info != StaticAllocaInfo::Unknown) + continue; + + // Check if the stored value is an argument, and that this store fully + // initializes the alloca. Don't elide copies from the same argument twice. + const Value *Val = SI->getValueOperand()->stripPointerCasts(); + const auto *Arg = dyn_cast<Argument>(Val); + if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() || + Arg->getType()->isEmptyTy() || + DL.getTypeStoreSize(Arg->getType()) != + DL.getTypeAllocSize(AI->getAllocatedType()) || + ArgCopyElisionCandidates.count(Arg)) { + *Info = StaticAllocaInfo::Clobbered; + continue; + } + + LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI + << '\n'); + + // Mark this alloca and store for argument copy elision. + *Info = StaticAllocaInfo::Elidable; + ArgCopyElisionCandidates.insert({Arg, {AI, SI}}); + + // Stop scanning if we've seen all arguments. This will happen early in -O0 + // builds, which is useful, because -O0 builds have large entry blocks and + // many allocas. + if (ArgCopyElisionCandidates.size() == NumArgs) + break; + } +} + +/// Try to elide argument copies from memory into a local alloca. Succeeds if +/// ArgVal is a load from a suitable fixed stack object. +static void tryToElideArgumentCopy( + FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains, + DenseMap<int, int> &ArgCopyElisionFrameIndexMap, + SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs, + ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg, + SDValue ArgVal, bool &ArgHasUses) { + // Check if this is a load from a fixed stack object. + auto *LNode = dyn_cast<LoadSDNode>(ArgVal); + if (!LNode) + return; + auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()); + if (!FINode) + return; + + // Check that the fixed stack object is the right size and alignment. + // Look at the alignment that the user wrote on the alloca instead of looking + // at the stack object. + auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg); + assert(ArgCopyIter != ArgCopyElisionCandidates.end()); + const AllocaInst *AI = ArgCopyIter->second.first; + int FixedIndex = FINode->getIndex(); + int &AllocaIndex = FuncInfo->StaticAllocaMap[AI]; + int OldIndex = AllocaIndex; + MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo(); + if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { + LLVM_DEBUG( + dbgs() << " argument copy elision failed due to bad fixed stack " + "object size\n"); + return; + } + unsigned RequiredAlignment = AI->getAlignment(); + if (!RequiredAlignment) { + RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment( + AI->getAllocatedType()); + } + if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { + LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " + "greater than stack argument alignment (" + << RequiredAlignment << " vs " + << MFI.getObjectAlignment(FixedIndex) << ")\n"); + return; + } + + // Perform the elision. Delete the old stack object and replace its only use + // in the variable info map. Mark the stack object as mutable. + LLVM_DEBUG({ + dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' + << " Replacing frame index " << OldIndex << " with " << FixedIndex + << '\n'; + }); + MFI.RemoveStackObject(OldIndex); + MFI.setIsImmutableObjectIndex(FixedIndex, false); + AllocaIndex = FixedIndex; + ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex}); + Chains.push_back(ArgVal.getValue(1)); + + // Avoid emitting code for the store implementing the copy. + const StoreInst *SI = ArgCopyIter->second.second; + ElidedArgCopyInstrs.insert(SI); + + // Check for uses of the argument again so that we can avoid exporting ArgVal + // if it is't used by anything other than the store. + for (const Value *U : Arg.users()) { + if (U != SI) { + ArgHasUses = true; + break; + } + } +} + +void SelectionDAGISel::LowerArguments(const Function &F) { + SelectionDAG &DAG = SDB->DAG; + SDLoc dl = SDB->getCurSDLoc(); + const DataLayout &DL = DAG.getDataLayout(); + SmallVector<ISD::InputArg, 16> Ins; + + if (!FuncInfo->CanLowerReturn) { + // Put in an sret pointer parameter before all the other parameters. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(*TLI, DAG.getDataLayout(), + F.getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), + ValueVTs); + + // NOTE: Assuming that a pointer will never break down to more than one VT + // or one register. + ISD::ArgFlagsTy Flags; + Flags.setSRet(); + MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); + ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, + ISD::InputArg::NoArgIndex, 0); + Ins.push_back(RetArg); + } + + // Look for stores of arguments to static allocas. Mark such arguments with a + // flag to ask the target to give us the memory location of that argument if + // available. + ArgCopyElisionMapTy ArgCopyElisionCandidates; + findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); + + // Set up the incoming argument description vector. + for (const Argument &Arg : F.args()) { + unsigned ArgNo = Arg.getArgNo(); + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); + bool isArgValueUsed = !Arg.use_empty(); + unsigned PartBase = 0; + Type *FinalType = Arg.getType(); + if (Arg.hasAttribute(Attribute::ByVal)) + FinalType = Arg.getParamByValType(); + bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( + FinalType, F.getCallingConv(), F.isVarArg()); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + EVT VT = ValueVTs[Value]; + Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); + ISD::ArgFlagsTy Flags; + + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + const Align OriginalAlignment( + TLI->getABIAlignmentForCallingConv(ArgTy, DL)); + + if (Arg.getType()->isPointerTy()) { + Flags.setPointer(); + Flags.setPointerAddrSpace( + cast<PointerType>(Arg.getType())->getAddressSpace()); + } + if (Arg.hasAttribute(Attribute::ZExt)) + Flags.setZExt(); + if (Arg.hasAttribute(Attribute::SExt)) + Flags.setSExt(); + if (Arg.hasAttribute(Attribute::InReg)) { + // If we are using vectorcall calling convention, a structure that is + // passed InReg - is surely an HVA + if (F.getCallingConv() == CallingConv::X86_VectorCall && + isa<StructType>(Arg.getType())) { + // The first value of a structure is marked + if (0 == Value) + Flags.setHvaStart(); + Flags.setHva(); + } + // Set InReg Flag + Flags.setInReg(); + } + if (Arg.hasAttribute(Attribute::StructRet)) + Flags.setSRet(); + if (Arg.hasAttribute(Attribute::SwiftSelf)) + Flags.setSwiftSelf(); + if (Arg.hasAttribute(Attribute::SwiftError)) + Flags.setSwiftError(); + if (Arg.hasAttribute(Attribute::ByVal)) + Flags.setByVal(); + if (Arg.hasAttribute(Attribute::InAlloca)) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (F.getCallingConv() == CallingConv::X86_INTR) { + // IA Interrupt passes frame (1st parameter) by value in the stack. + if (ArgNo == 0) + Flags.setByVal(); + } + if (Flags.isByVal() || Flags.isInAlloca()) { + Type *ElementTy = Arg.getParamByValType(); + + // For ByVal, size and alignment should be passed from FE. BE will + // guess if this info is not there but there are cases it cannot get + // right. + unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType()); + Flags.setByValSize(FrameSize); + + unsigned FrameAlign; + if (Arg.getParamAlignment()) + FrameAlign = Arg.getParamAlignment(); + else + FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); + Flags.setByValAlign(Align(FrameAlign)); + } + if (Arg.hasAttribute(Attribute::Nest)) + Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); + Flags.setOrigAlign(OriginalAlignment); + if (ArgCopyElisionCandidates.count(&Arg)) + Flags.setCopyElisionCandidate(); + if (Arg.hasAttribute(Attribute::Returned)) + Flags.setReturned(); + + MVT RegisterVT = TLI->getRegisterTypeForCallingConv( + *CurDAG->getContext(), F.getCallingConv(), VT); + unsigned NumRegs = TLI->getNumRegistersForCallingConv( + *CurDAG->getContext(), F.getCallingConv(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, + ArgNo, PartBase+i*RegisterVT.getStoreSize()); + if (NumRegs > 1 && i == 0) + MyFlags.Flags.setSplit(); + // if it isn't first piece, alignment must be 1 + else if (i > 0) { + MyFlags.Flags.setOrigAlign(Align::None()); + if (i == NumRegs - 1) + MyFlags.Flags.setSplitEnd(); + } + Ins.push_back(MyFlags); + } + if (NeedsRegBlock && Value == NumValues - 1) + Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); + PartBase += VT.getStoreSize(); + } + } + + // Call the target to set up the argument values. + SmallVector<SDValue, 8> InVals; + SDValue NewRoot = TLI->LowerFormalArguments( + DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); + + // Verify that the target's LowerFormalArguments behaved as expected. + assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && + "LowerFormalArguments didn't return a valid chain!"); + assert(InVals.size() == Ins.size() && + "LowerFormalArguments didn't emit the correct number of values!"); + LLVM_DEBUG({ + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerFormalArguments emitted a null value!"); + assert(EVT(Ins[i].VT) == InVals[i].getValueType() && + "LowerFormalArguments emitted a value with the wrong type!"); + } + }); + + // Update the DAG with the new chain value resulting from argument lowering. + DAG.setRoot(NewRoot); + + // Set up the argument values. + unsigned i = 0; + if (!FuncInfo->CanLowerReturn) { + // Create a virtual register for the sret pointer, and put in a copy + // from the sret argument into it. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(*TLI, DAG.getDataLayout(), + F.getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), + ValueVTs); + MVT VT = ValueVTs[0].getSimpleVT(); + MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); + Optional<ISD::NodeType> AssertOp = None; + SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, + nullptr, F.getCallingConv(), AssertOp); + + MachineFunction& MF = SDB->DAG.getMachineFunction(); + MachineRegisterInfo& RegInfo = MF.getRegInfo(); + Register SRetReg = + RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); + FuncInfo->DemoteRegister = SRetReg; + NewRoot = + SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); + DAG.setRoot(NewRoot); + + // i indexes lowered arguments. Bump it past the hidden sret argument. + ++i; + } + + SmallVector<SDValue, 4> Chains; + DenseMap<int, int> ArgCopyElisionFrameIndexMap; + for (const Argument &Arg : F.args()) { + SmallVector<SDValue, 4> ArgValues; + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + continue; + + bool ArgHasUses = !Arg.use_empty(); + + // Elide the copying store if the target loaded this argument from a + // suitable fixed stack object. + if (Ins[i].Flags.isCopyElisionCandidate()) { + tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap, + ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg, + InVals[i], ArgHasUses); + } + + // If this argument is unused then remember its value. It is used to generate + // debugging information. + bool isSwiftErrorArg = + TLI->supportSwiftError() && + Arg.hasAttribute(Attribute::SwiftError); + if (!ArgHasUses && !isSwiftErrorArg) { + SDB->setUnusedArgValue(&Arg, InVals[i]); + + // Also remember any frame index for use in FastISel. + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); + } + + for (unsigned Val = 0; Val != NumValues; ++Val) { + EVT VT = ValueVTs[Val]; + MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), + F.getCallingConv(), VT); + unsigned NumParts = TLI->getNumRegistersForCallingConv( + *CurDAG->getContext(), F.getCallingConv(), VT); + + // Even an apparant 'unused' swifterror argument needs to be returned. So + // we do generate a copy for it that can be used on return from the + // function. + if (ArgHasUses || isSwiftErrorArg) { + Optional<ISD::NodeType> AssertOp; + if (Arg.hasAttribute(Attribute::SExt)) + AssertOp = ISD::AssertSext; + else if (Arg.hasAttribute(Attribute::ZExt)) + AssertOp = ISD::AssertZext; + + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, + PartVT, VT, nullptr, + F.getCallingConv(), AssertOp)); + } + + i += NumParts; + } + + // We don't need to do anything else for unused arguments. + if (ArgValues.empty()) + continue; + + // Note down frame index. + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); + + SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), + SDB->getCurSDLoc()); + + SDB->setValue(&Arg, Res); + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + // We want to associate the argument with the frame index, among + // involved operands, that correspond to the lowest address. The + // getCopyFromParts function, called earlier, is swapping the order of + // the operands to BUILD_PAIR depending on endianness. The result of + // that swapping is that the least significant bits of the argument will + // be in the first operand of the BUILD_PAIR node, and the most + // significant bits will be in the second operand. + unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0; + if (LoadSDNode *LNode = + dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode())) + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); + } + + // Analyses past this point are naive and don't expect an assertion. + if (Res.getOpcode() == ISD::AssertZext) + Res = Res.getOperand(0); + + // Update the SwiftErrorVRegDefMap. + if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { + unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); + if (Register::isVirtualRegister(Reg)) + SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(), + Reg); + } + + // If this argument is live outside of the entry block, insert a copy from + // wherever we got it to the vreg that other BB's will reference it as. + if (Res.getOpcode() == ISD::CopyFromReg) { + // If we can, though, try to skip creating an unnecessary vreg. + // FIXME: This isn't very clean... it would be nice to make this more + // general. + unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); + if (Register::isVirtualRegister(Reg)) { + FuncInfo->ValueMap[&Arg] = Reg; + continue; + } + } + if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) { + FuncInfo->InitializeRegForValue(&Arg); + SDB->CopyToExportRegsIfNeeded(&Arg); + } + } + + if (!Chains.empty()) { + Chains.push_back(NewRoot); + NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + } + + DAG.setRoot(NewRoot); + + assert(i == InVals.size() && "Argument register count mismatch!"); + + // If any argument copy elisions occurred and we have debug info, update the + // stale frame indices used in the dbg.declare variable info table. + MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo(); + if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) { + for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) { + auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot); + if (I != ArgCopyElisionFrameIndexMap.end()) + VI.Slot = I->second; + } + } + + // Finally, if the target has anything special to do, allow it to do so. + EmitFunctionEntryCode(); +} + +/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to +/// ensure constants are generated when needed. Remember the virtual registers +/// that need to be added to the Machine PHI nodes as input. We cannot just +/// directly add them, because expansion might result in multiple MBB's for one +/// BB. As such, the start of the BB might correspond to a different MBB than +/// the end. +void +SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const Instruction *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + + // Check PHI nodes in successors that expect a value to be available from this + // block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + const BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB).second) + continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (const PHINode &PN : SuccBB->phis()) { + // Ignore dead phi's. + if (PN.use_empty()) + continue; + + // Skip empty types + if (PN.getType()->isEmptyTy()) + continue; + + unsigned Reg; + const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); + + if (const Constant *C = dyn_cast<Constant>(PHIOp)) { + unsigned &RegOut = ConstantsOut[C]; + if (RegOut == 0) { + RegOut = FuncInfo.CreateRegs(C); + CopyValueToVirtualRegister(C, RegOut); + } + Reg = RegOut; + } else { + DenseMap<const Value *, unsigned>::iterator I = + FuncInfo.ValueMap.find(PHIOp); + if (I != FuncInfo.ValueMap.end()) + Reg = I->second; + else { + assert(isa<AllocaInst>(PHIOp) && + FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && + "Didn't codegen value into a register!??"); + Reg = FuncInfo.CreateRegs(PHIOp); + CopyValueToVirtualRegister(PHIOp, Reg); + } + } + + // Remember that this register needs to added to the machine PHI node as + // the input for this MBB. + SmallVector<EVT, 4> ValueVTs; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); + for (unsigned i = 0, e = NumRegisters; i != e; ++i) + FuncInfo.PHINodesToUpdate.push_back( + std::make_pair(&*MBBI++, Reg + i)); + Reg += NumRegisters; + } + } + } + + ConstantsOut.clear(); +} + +/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB +/// is 0. +MachineBasicBlock * +SelectionDAGBuilder::StackProtectorDescriptor:: +AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + bool IsLikely, + MachineBasicBlock *SuccMBB) { + // If SuccBB has not been created yet, create it. + if (!SuccMBB) { + MachineFunction *MF = ParentMBB->getParent(); + MachineFunction::iterator BBI(ParentMBB); + SuccMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(++BBI, SuccMBB); + } + // Add it as a successor of ParentMBB. + ParentMBB->addSuccessor( + SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); + return SuccMBB; +} + +MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { + MachineFunction::iterator I(MBB); + if (++I == FuncInfo.MF->end()) + return nullptr; + return &*I; +} + +/// During lowering new call nodes can be created (such as memset, etc.). +/// Those will become new roots of the current DAG, but complications arise +/// when they are tail calls. In such cases, the call lowering will update +/// the root, but the builder still needs to know that a tail call has been +/// lowered in order to avoid generating an additional return. +void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { + // If the node is null, we do have a tail call. + if (MaybeTC.getNode() != nullptr) + DAG.setRoot(MaybeTC); + else + HasTailCall = true; +} + +void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *DefaultMBB) { + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *NextMBB = nullptr; + MachineFunction::iterator BBI(W.MBB); + if (++BBI != FuncInfo.MF->end()) + NextMBB = &*BBI; + + unsigned Size = W.LastCluster - W.FirstCluster + 1; + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + + if (Size == 2 && W.MBB == SwitchMBB) { + // If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + // TODO: This could be extended to merge any 2 cases in switches with 3 + // cases. + // TODO: Handle cases where W.CaseBB != SwitchBB. + CaseCluster &Small = *W.FirstCluster; + CaseCluster &Big = *W.LastCluster; + + if (Small.Low == Small.High && Big.Low == Big.High && + Small.MBB == Big.MBB) { + const APInt &SmallValue = Small.Low->getValue(); + const APInt &BigValue = Big.Low->getValue(); + + // Check that there is only one bit different. + APInt CommonBit = BigValue ^ SmallValue; + if (CommonBit.isPowerOf2()) { + SDValue CondLHS = getValue(Cond); + EVT VT = CondLHS.getValueType(); + SDLoc DL = getCurSDLoc(); + + SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, + DAG.getConstant(CommonBit, DL, VT)); + SDValue Cond = DAG.getSetCC( + DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT), + ISD::SETEQ); + + // Update successor info. + // Both Small and Big will jump to Small.BB, so we sum up the + // probabilities. + addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob); + if (BPI) + addSuccessorWithProb( + SwitchMBB, DefaultMBB, + // The default destination is the first successor in IR. + BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0)); + else + addSuccessorWithProb(SwitchMBB, DefaultMBB); + + // Insert the true branch. + SDValue BrCond = + DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(Small.MBB)); + // Insert the false branch. + BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, + DAG.getBasicBlock(DefaultMBB)); + + DAG.setRoot(BrCond); + return; + } + } + } + + if (TM.getOptLevel() != CodeGenOpt::None) { + // Here, we order cases by probability so the most likely case will be + // checked first. However, two clusters can have the same probability in + // which case their relative ordering is non-deterministic. So we use Low + // as a tie-breaker as clusters are guaranteed to never overlap. + llvm::sort(W.FirstCluster, W.LastCluster + 1, + [](const CaseCluster &a, const CaseCluster &b) { + return a.Prob != b.Prob ? + a.Prob > b.Prob : + a.Low->getValue().slt(b.Low->getValue()); + }); + + // Rearrange the case blocks so that the last one falls through if possible + // without changing the order of probabilities. + for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { + --I; + if (I->Prob > W.LastCluster->Prob) + break; + if (I->Kind == CC_Range && I->MBB == NextMBB) { + std::swap(*I, *W.LastCluster); + break; + } + } + } + + // Compute total probability. + BranchProbability DefaultProb = W.DefaultProb; + BranchProbability UnhandledProbs = DefaultProb; + for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) + UnhandledProbs += I->Prob; + + MachineBasicBlock *CurMBB = W.MBB; + for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { + bool FallthroughUnreachable = false; + MachineBasicBlock *Fallthrough; + if (I == W.LastCluster) { + // For the last cluster, fall through to the default destination. + Fallthrough = DefaultMBB; + FallthroughUnreachable = isa<UnreachableInst>( + DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg()); + } else { + Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); + CurMF->insert(BBI, Fallthrough); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + UnhandledProbs -= I->Prob; + + switch (I->Kind) { + case CC_JumpTable: { + // FIXME: Optimize away range check based on pivot comparisons. + JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first; + SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second; + + // The jump block hasn't been inserted yet; insert it here. + MachineBasicBlock *JumpMBB = JT->MBB; + CurMF->insert(BBI, JumpMBB); + + auto JumpProb = I->Prob; + auto FallthroughProb = UnhandledProbs; + + // If the default statement is a target of the jump table, we evenly + // distribute the default probability to successors of CurMBB. Also + // update the probability on the edge from JumpMBB to Fallthrough. + for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), + SE = JumpMBB->succ_end(); + SI != SE; ++SI) { + if (*SI == DefaultMBB) { + JumpProb += DefaultProb / 2; + FallthroughProb -= DefaultProb / 2; + JumpMBB->setSuccProbability(SI, DefaultProb / 2); + JumpMBB->normalizeSuccProbs(); + break; + } + } + + if (FallthroughUnreachable) { + // Skip the range check if the fallthrough block is unreachable. + JTH->OmitRangeCheck = true; + } + + if (!JTH->OmitRangeCheck) + addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); + addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); + CurMBB->normalizeSuccProbs(); + + // The jump table header will be inserted in our current block, do the + // range check, and fall through to our fallthrough block. + JTH->HeaderBB = CurMBB; + JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. + + // If we're in the right place, emit the jump table header right now. + if (CurMBB == SwitchMBB) { + visitJumpTableHeader(*JT, *JTH, SwitchMBB); + JTH->Emitted = true; + } + break; + } + case CC_BitTests: { + // FIXME: Optimize away range check based on pivot comparisons. + BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; + + // The bit test blocks haven't been inserted yet; insert them here. + for (BitTestCase &BTC : BTB->Cases) + CurMF->insert(BBI, BTC.ThisBB); + + // Fill in fields of the BitTestBlock. + BTB->Parent = CurMBB; + BTB->Default = Fallthrough; + + BTB->DefaultProb = UnhandledProbs; + // If the cases in bit test don't form a contiguous range, we evenly + // distribute the probability on the edge to Fallthrough to two + // successors of CurMBB. + if (!BTB->ContiguousRange) { + BTB->Prob += DefaultProb / 2; + BTB->DefaultProb -= DefaultProb / 2; + } + + if (FallthroughUnreachable) { + // Skip the range check if the fallthrough block is unreachable. + BTB->OmitRangeCheck = true; + } + + // If we're in the right place, emit the bit test header right now. + if (CurMBB == SwitchMBB) { + visitBitTestHeader(*BTB, SwitchMBB); + BTB->Emitted = true; + } + break; + } + case CC_Range: { + const Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->Low == I->High) { + // Check Cond == I->Low. + CC = ISD::SETEQ; + LHS = Cond; + RHS=I->Low; + MHS = nullptr; + } else { + // Check I->Low <= Cond <= I->High. + CC = ISD::SETLE; + LHS = I->Low; + MHS = Cond; + RHS = I->High; + } + + // If Fallthrough is unreachable, fold away the comparison. + if (FallthroughUnreachable) + CC = ISD::SETTRUE; + + // The false probability is the sum of all unhandled cases. + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, + getCurSDLoc(), I->Prob, UnhandledProbs); + + if (CurMBB == SwitchMBB) + visitSwitchCase(CB, SwitchMBB); + else + SL->SwitchCases.push_back(CB); + + break; + } + } + CurMBB = Fallthrough; + } +} + +unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, + CaseClusterIt First, + CaseClusterIt Last) { + return std::count_if(First, Last + 1, [&](const CaseCluster &X) { + if (X.Prob != CC.Prob) + return X.Prob > CC.Prob; + + // Ties are broken by comparing the case value. + return X.Low->getValue().slt(CC.Low->getValue()); + }); +} + +void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, + const SwitchWorkListItem &W, + Value *Cond, + MachineBasicBlock *SwitchMBB) { + assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && + "Clusters not sorted?"); + + assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); + + // Balance the tree based on branch probabilities to create a near-optimal (in + // terms of search time given key frequency) binary search tree. See e.g. Kurt + // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). + CaseClusterIt LastLeft = W.FirstCluster; + CaseClusterIt FirstRight = W.LastCluster; + auto LeftProb = LastLeft->Prob + W.DefaultProb / 2; + auto RightProb = FirstRight->Prob + W.DefaultProb / 2; + + // Move LastLeft and FirstRight towards each other from opposite directions to + // find a partitioning of the clusters which balances the probability on both + // sides. If LeftProb and RightProb are equal, alternate which side is + // taken to ensure 0-probability nodes are distributed evenly. + unsigned I = 0; + while (LastLeft + 1 < FirstRight) { + if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1))) + LeftProb += (++LastLeft)->Prob; + else + RightProb += (--FirstRight)->Prob; + I++; + } + + while (true) { + // Our binary search tree differs from a typical BST in that ours can have up + // to three values in each leaf. The pivot selection above doesn't take that + // into account, which means the tree might require more nodes and be less + // efficient. We compensate for this here. + + unsigned NumLeft = LastLeft - W.FirstCluster + 1; + unsigned NumRight = W.LastCluster - FirstRight + 1; + + if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) { + // If one side has less than 3 clusters, and the other has more than 3, + // consider taking a cluster from the other side. + + if (NumLeft < NumRight) { + // Consider moving the first cluster on the right to the left side. + CaseCluster &CC = *FirstRight; + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + if (LeftSideRank <= RightSideRank) { + // Moving the cluster to the left does not demote it. + ++LastLeft; + ++FirstRight; + continue; + } + } else { + assert(NumRight < NumLeft); + // Consider moving the last element on the left to the right side. + CaseCluster &CC = *LastLeft; + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + if (RightSideRank <= LeftSideRank) { + // Moving the cluster to the right does not demot it. + --LastLeft; + --FirstRight; + continue; + } + } + } + break; + } + + assert(LastLeft + 1 == FirstRight); + assert(LastLeft >= W.FirstCluster); + assert(FirstRight <= W.LastCluster); + + // Use the first element on the right as pivot since we will make less-than + // comparisons against it. + CaseClusterIt PivotCluster = FirstRight; + assert(PivotCluster > W.FirstCluster); + assert(PivotCluster <= W.LastCluster); + + CaseClusterIt FirstLeft = W.FirstCluster; + CaseClusterIt LastRight = W.LastCluster; + + const ConstantInt *Pivot = PivotCluster->Low; + + // New blocks will be inserted immediately after the current one. + MachineFunction::iterator BBI(W.MBB); + ++BBI; + + // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, + // we can branch to its destination directly if it's squeezed exactly in + // between the known lower bound and Pivot - 1. + MachineBasicBlock *LeftMBB; + if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range && + FirstLeft->Low == W.GE && + (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) { + LeftMBB = FirstLeft->MBB; + } else { + LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, LeftMBB); + WorkList.push_back( + {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a + // single cluster, RHS.Low == Pivot, and we can branch to its destination + // directly if RHS.High equals the current upper bound. + MachineBasicBlock *RightMBB; + if (FirstRight == LastRight && FirstRight->Kind == CC_Range && + W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) { + RightMBB = FirstRight->MBB; + } else { + RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, RightMBB); + WorkList.push_back( + {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + // Create the CaseBlock record that will be used to lower the branch. + CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, + getCurSDLoc(), LeftProb, RightProb); + + if (W.MBB == SwitchMBB) + visitSwitchCase(CB, SwitchMBB); + else + SL->SwitchCases.push_back(CB); +} + +// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb +// from the swith statement. +static BranchProbability scaleCaseProbality(BranchProbability CaseProb, + BranchProbability PeeledCaseProb) { + if (PeeledCaseProb == BranchProbability::getOne()) + return BranchProbability::getZero(); + BranchProbability SwitchProb = PeeledCaseProb.getCompl(); + + uint32_t Numerator = CaseProb.getNumerator(); + uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator()); + return BranchProbability(Numerator, std::max(Numerator, Denominator)); +} + +// Try to peel the top probability case if it exceeds the threshold. +// Return current MachineBasicBlock for the switch statement if the peeling +// does not occur. +// If the peeling is performed, return the newly created MachineBasicBlock +// for the peeled switch statement. Also update Clusters to remove the peeled +// case. PeeledCaseProb is the BranchProbability for the peeled case. +MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( + const SwitchInst &SI, CaseClusterVector &Clusters, + BranchProbability &PeeledCaseProb) { + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + // Don't perform if there is only one cluster or optimizing for size. + if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 || + TM.getOptLevel() == CodeGenOpt::None || + SwitchMBB->getParent()->getFunction().hasMinSize()) + return SwitchMBB; + + BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100); + unsigned PeeledCaseIndex = 0; + bool SwitchPeeled = false; + for (unsigned Index = 0; Index < Clusters.size(); ++Index) { + CaseCluster &CC = Clusters[Index]; + if (CC.Prob < TopCaseProb) + continue; + TopCaseProb = CC.Prob; + PeeledCaseIndex = Index; + SwitchPeeled = true; + } + if (!SwitchPeeled) + return SwitchMBB; + + LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " + << TopCaseProb << "\n"); + + // Record the MBB for the peeled switch statement. + MachineFunction::iterator BBI(SwitchMBB); + ++BBI; + MachineBasicBlock *PeeledSwitchMBB = + FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, PeeledSwitchMBB); + + ExportFromCurrentBlock(SI.getCondition()); + auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex; + SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt, + nullptr, nullptr, TopCaseProb.getCompl()}; + lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB); + + Clusters.erase(PeeledCaseIt); + for (CaseCluster &CC : Clusters) { + LLVM_DEBUG( + dbgs() << "Scale the probablity for one cluster, before scaling: " + << CC.Prob << "\n"); + CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb); + LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); + } + PeeledCaseProb = TopCaseProb; + return PeeledSwitchMBB; +} + +void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { + // Extract cases from the switch. + BranchProbabilityInfo *BPI = FuncInfo.BPI; + CaseClusterVector Clusters; + Clusters.reserve(SI.getNumCases()); + for (auto I : SI.cases()) { + MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; + const ConstantInt *CaseVal = I.getCaseValue(); + BranchProbability Prob = + BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex()) + : BranchProbability(1, SI.getNumCases() + 1); + Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); + } + + MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; + + // Cluster adjacent cases with the same destination. We do this at all + // optimization levels because it's cheap to do and will make codegen faster + // if there are many clusters. + sortAndRangeify(Clusters); + + // The branch probablity of the peeled case. + BranchProbability PeeledCaseProb = BranchProbability::getZero(); + MachineBasicBlock *PeeledSwitchMBB = + peelDominantCaseCluster(SI, Clusters, PeeledCaseProb); + + // If there is only the default destination, jump there directly. + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + if (Clusters.empty()) { + assert(PeeledSwitchMBB == SwitchMBB); + SwitchMBB->addSuccessor(DefaultMBB); + if (DefaultMBB != NextBlock(SwitchMBB)) { + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(DefaultMBB))); + } + return; + } + + SL->findJumpTables(Clusters, &SI, DefaultMBB); + SL->findBitTestClusters(Clusters, &SI); + + LLVM_DEBUG({ + dbgs() << "Case clusters: "; + for (const CaseCluster &C : Clusters) { + if (C.Kind == CC_JumpTable) + dbgs() << "JT:"; + if (C.Kind == CC_BitTests) + dbgs() << "BT:"; + + C.Low->getValue().print(dbgs(), true); + if (C.Low != C.High) { + dbgs() << '-'; + C.High->getValue().print(dbgs(), true); + } + dbgs() << ' '; + } + dbgs() << '\n'; + }); + + assert(!Clusters.empty()); + SwitchWorkList WorkList; + CaseClusterIt First = Clusters.begin(); + CaseClusterIt Last = Clusters.end() - 1; + auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB); + // Scale the branchprobability for DefaultMBB if the peel occurs and + // DefaultMBB is not replaced. + if (PeeledCaseProb != BranchProbability::getZero() && + DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()]) + DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb); + WorkList.push_back( + {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); + + while (!WorkList.empty()) { + SwitchWorkListItem W = WorkList.back(); + WorkList.pop_back(); + unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; + + if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && + !DefaultMBB->getParent()->getFunction().hasMinSize()) { + // For optimized builds, lower large range as a balanced binary tree. + splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); + continue; + } + + lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); + } +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h new file mode 100644 index 0000000000000..bfcf30b430b6d --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -0,0 +1,892 @@ +//===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H + +#include "StatepointLowering.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/SwitchLoweringUtils.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <utility> +#include <vector> + +namespace llvm { + +class AllocaInst; +class AtomicCmpXchgInst; +class AtomicRMWInst; +class BasicBlock; +class BranchInst; +class CallInst; +class CallBrInst; +class CatchPadInst; +class CatchReturnInst; +class CatchSwitchInst; +class CleanupPadInst; +class CleanupReturnInst; +class Constant; +class ConstantInt; +class ConstrainedFPIntrinsic; +class DbgValueInst; +class DataLayout; +class DIExpression; +class DILocalVariable; +class DILocation; +class FenceInst; +class FunctionLoweringInfo; +class GCFunctionInfo; +class GCRelocateInst; +class GCResultInst; +class IndirectBrInst; +class InvokeInst; +class LandingPadInst; +class LLVMContext; +class LoadInst; +class MachineBasicBlock; +class PHINode; +class ResumeInst; +class ReturnInst; +class SDDbgValue; +class StoreInst; +class SwiftErrorValueTracking; +class SwitchInst; +class TargetLibraryInfo; +class TargetMachine; +class Type; +class VAArgInst; +class UnreachableInst; +class Use; +class User; +class Value; + +//===----------------------------------------------------------------------===// +/// SelectionDAGBuilder - This is the common target-independent lowering +/// implementation that is parameterized by a TargetLowering object. +/// +class SelectionDAGBuilder { + /// The current instruction being visited. + const Instruction *CurInst = nullptr; + + DenseMap<const Value*, SDValue> NodeMap; + + /// Maps argument value for unused arguments. This is used + /// to preserve debug information for incoming arguments. + DenseMap<const Value*, SDValue> UnusedArgNodeMap; + + /// Helper type for DanglingDebugInfoMap. + class DanglingDebugInfo { + const DbgValueInst* DI = nullptr; + DebugLoc dl; + unsigned SDNodeOrder = 0; + + public: + DanglingDebugInfo() = default; + DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) + : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {} + + const DbgValueInst* getDI() { return DI; } + DebugLoc getdl() { return dl; } + unsigned getSDNodeOrder() { return SDNodeOrder; } + }; + + /// Helper type for DanglingDebugInfoMap. + typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector; + + /// Keeps track of dbg_values for which we have not yet seen the referent. + /// We defer handling these until we do see it. + MapVector<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap; + +public: + /// Loads are not emitted to the program immediately. We bunch them up and + /// then emit token factor nodes when possible. This allows us to get simple + /// disambiguation between loads without worrying about alias analysis. + SmallVector<SDValue, 8> PendingLoads; + + /// State used while lowering a statepoint sequence (gc_statepoint, + /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details. + StatepointLoweringState StatepointLowering; + +private: + /// CopyToReg nodes that copy values to virtual registers for export to other + /// blocks need to be emitted before any terminator instruction, but they have + /// no other ordering requirements. We bunch them up and the emit a single + /// tokenfactor for them just before terminator instructions. + SmallVector<SDValue, 8> PendingExports; + + /// A unique monotonically increasing number used to order the SDNodes we + /// create. + unsigned SDNodeOrder; + + /// Determine the rank by weight of CC in [First,Last]. If CC has more weight + /// than each cluster in the range, its rank is 0. + unsigned caseClusterRank(const SwitchCG::CaseCluster &CC, + SwitchCG::CaseClusterIt First, + SwitchCG::CaseClusterIt Last); + + /// Emit comparison and split W into two subtrees. + void splitWorkItem(SwitchCG::SwitchWorkList &WorkList, + const SwitchCG::SwitchWorkListItem &W, Value *Cond, + MachineBasicBlock *SwitchMBB); + + /// Lower W. + void lowerWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *DefaultMBB); + + /// Peel the top probability case if it exceeds the threshold + MachineBasicBlock * + peelDominantCaseCluster(const SwitchInst &SI, + SwitchCG::CaseClusterVector &Clusters, + BranchProbability &PeeledCaseProb); + + /// A class which encapsulates all of the information needed to generate a + /// stack protector check and signals to isel via its state being initialized + /// that a stack protector needs to be generated. + /// + /// *NOTE* The following is a high level documentation of SelectionDAG Stack + /// Protector Generation. The reason that it is placed here is for a lack of + /// other good places to stick it. + /// + /// High Level Overview of SelectionDAG Stack Protector Generation: + /// + /// Previously, generation of stack protectors was done exclusively in the + /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated + /// splitting basic blocks at the IR level to create the success/failure basic + /// blocks in the tail of the basic block in question. As a result of this, + /// calls that would have qualified for the sibling call optimization were no + /// longer eligible for optimization since said calls were no longer right in + /// the "tail position" (i.e. the immediate predecessor of a ReturnInst + /// instruction). + /// + /// Then it was noticed that since the sibling call optimization causes the + /// callee to reuse the caller's stack, if we could delay the generation of + /// the stack protector check until later in CodeGen after the sibling call + /// decision was made, we get both the tail call optimization and the stack + /// protector check! + /// + /// A few goals in solving this problem were: + /// + /// 1. Preserve the architecture independence of stack protector generation. + /// + /// 2. Preserve the normal IR level stack protector check for platforms like + /// OpenBSD for which we support platform-specific stack protector + /// generation. + /// + /// The main problem that guided the present solution is that one can not + /// solve this problem in an architecture independent manner at the IR level + /// only. This is because: + /// + /// 1. The decision on whether or not to perform a sibling call on certain + /// platforms (for instance i386) requires lower level information + /// related to available registers that can not be known at the IR level. + /// + /// 2. Even if the previous point were not true, the decision on whether to + /// perform a tail call is done in LowerCallTo in SelectionDAG which + /// occurs after the Stack Protector Pass. As a result, one would need to + /// put the relevant callinst into the stack protector check success + /// basic block (where the return inst is placed) and then move it back + /// later at SelectionDAG/MI time before the stack protector check if the + /// tail call optimization failed. The MI level option was nixed + /// immediately since it would require platform-specific pattern + /// matching. The SelectionDAG level option was nixed because + /// SelectionDAG only processes one IR level basic block at a time + /// implying one could not create a DAG Combine to move the callinst. + /// + /// To get around this problem a few things were realized: + /// + /// 1. While one can not handle multiple IR level basic blocks at the + /// SelectionDAG Level, one can generate multiple machine basic blocks + /// for one IR level basic block. This is how we handle bit tests and + /// switches. + /// + /// 2. At the MI level, tail calls are represented via a special return + /// MIInst called "tcreturn". Thus if we know the basic block in which we + /// wish to insert the stack protector check, we get the correct behavior + /// by always inserting the stack protector check right before the return + /// statement. This is a "magical transformation" since no matter where + /// the stack protector check intrinsic is, we always insert the stack + /// protector check code at the end of the BB. + /// + /// Given the aforementioned constraints, the following solution was devised: + /// + /// 1. On platforms that do not support SelectionDAG stack protector check + /// generation, allow for the normal IR level stack protector check + /// generation to continue. + /// + /// 2. On platforms that do support SelectionDAG stack protector check + /// generation: + /// + /// a. Use the IR level stack protector pass to decide if a stack + /// protector is required/which BB we insert the stack protector check + /// in by reusing the logic already therein. If we wish to generate a + /// stack protector check in a basic block, we place a special IR + /// intrinsic called llvm.stackprotectorcheck right before the BB's + /// returninst or if there is a callinst that could potentially be + /// sibling call optimized, before the call inst. + /// + /// b. Then when a BB with said intrinsic is processed, we codegen the BB + /// normally via SelectBasicBlock. In said process, when we visit the + /// stack protector check, we do not actually emit anything into the + /// BB. Instead, we just initialize the stack protector descriptor + /// class (which involves stashing information/creating the success + /// mbbb and the failure mbb if we have not created one for this + /// function yet) and export the guard variable that we are going to + /// compare. + /// + /// c. After we finish selecting the basic block, in FinishBasicBlock if + /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is + /// initialized, we produce the validation code with one of these + /// techniques: + /// 1) with a call to a guard check function + /// 2) with inlined instrumentation + /// + /// 1) We insert a call to the check function before the terminator. + /// + /// 2) We first find a splice point in the parent basic block + /// before the terminator and then splice the terminator of said basic + /// block into the success basic block. Then we code-gen a new tail for + /// the parent basic block consisting of the two loads, the comparison, + /// and finally two branches to the success/failure basic blocks. We + /// conclude by code-gening the failure basic block if we have not + /// code-gened it already (all stack protector checks we generate in + /// the same function, use the same failure basic block). + class StackProtectorDescriptor { + public: + StackProtectorDescriptor() = default; + + /// Returns true if all fields of the stack protector descriptor are + /// initialized implying that we should/are ready to emit a stack protector. + bool shouldEmitStackProtector() const { + return ParentMBB && SuccessMBB && FailureMBB; + } + + bool shouldEmitFunctionBasedCheckStackProtector() const { + return ParentMBB && !SuccessMBB && !FailureMBB; + } + + /// Initialize the stack protector descriptor structure for a new basic + /// block. + void initialize(const BasicBlock *BB, MachineBasicBlock *MBB, + bool FunctionBasedInstrumentation) { + // Make sure we are not initialized yet. + assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " + "already initialized!"); + ParentMBB = MBB; + if (!FunctionBasedInstrumentation) { + SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); + FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); + } + } + + /// Reset state that changes when we handle different basic blocks. + /// + /// This currently includes: + /// + /// 1. The specific basic block we are generating a + /// stack protector for (ParentMBB). + /// + /// 2. The successor machine basic block that will contain the tail of + /// parent mbb after we create the stack protector check (SuccessMBB). This + /// BB is visited only on stack protector check success. + void resetPerBBState() { + ParentMBB = nullptr; + SuccessMBB = nullptr; + } + + /// Reset state that only changes when we switch functions. + /// + /// This currently includes: + /// + /// 1. FailureMBB since we reuse the failure code path for all stack + /// protector checks created in an individual function. + /// + /// 2.The guard variable since the guard variable we are checking against is + /// always the same. + void resetPerFunctionState() { + FailureMBB = nullptr; + } + + MachineBasicBlock *getParentMBB() { return ParentMBB; } + MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } + MachineBasicBlock *getFailureMBB() { return FailureMBB; } + + private: + /// The basic block for which we are generating the stack protector. + /// + /// As a result of stack protector generation, we will splice the + /// terminators of this basic block into the successor mbb SuccessMBB and + /// replace it with a compare/branch to the successor mbbs + /// SuccessMBB/FailureMBB depending on whether or not the stack protector + /// was violated. + MachineBasicBlock *ParentMBB = nullptr; + + /// A basic block visited on stack protector check success that contains the + /// terminators of ParentMBB. + MachineBasicBlock *SuccessMBB = nullptr; + + /// This basic block visited on stack protector check failure that will + /// contain a call to __stack_chk_fail(). + MachineBasicBlock *FailureMBB = nullptr; + + /// Add a successor machine basic block to ParentMBB. If the successor mbb + /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic + /// block will be created. Assign a large weight if IsLikely is true. + MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + bool IsLikely, + MachineBasicBlock *SuccMBB = nullptr); + }; + +private: + const TargetMachine &TM; + +public: + /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling + /// nodes without a corresponding SDNode. + static const unsigned LowestSDNodeOrder = 1; + + SelectionDAG &DAG; + const DataLayout *DL = nullptr; + AliasAnalysis *AA = nullptr; + const TargetLibraryInfo *LibInfo; + + class SDAGSwitchLowering : public SwitchCG::SwitchLowering { + public: + SDAGSwitchLowering(SelectionDAGBuilder *sdb, FunctionLoweringInfo &funcinfo) + : SwitchCG::SwitchLowering(funcinfo), SDB(sdb) {} + + virtual void addSuccessorWithProb( + MachineBasicBlock *Src, MachineBasicBlock *Dst, + BranchProbability Prob = BranchProbability::getUnknown()) override { + SDB->addSuccessorWithProb(Src, Dst, Prob); + } + + private: + SelectionDAGBuilder *SDB; + }; + + std::unique_ptr<SDAGSwitchLowering> SL; + + /// A StackProtectorDescriptor structure used to communicate stack protector + /// information in between SelectBasicBlock and FinishBasicBlock. + StackProtectorDescriptor SPDescriptor; + + // Emit PHI-node-operand constants only once even if used by multiple + // PHI nodes. + DenseMap<const Constant *, unsigned> ConstantsOut; + + /// Information about the function as a whole. + FunctionLoweringInfo &FuncInfo; + + /// Information about the swifterror values used throughout the function. + SwiftErrorValueTracking &SwiftError; + + /// Garbage collection metadata for the function. + GCFunctionInfo *GFI; + + /// Map a landing pad to the call site indexes. + DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap; + + /// This is set to true if a call in the current block has been translated as + /// a tail call. In this case, no subsequent DAG nodes should be created. + bool HasTailCall = false; + + LLVMContext *Context; + + SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, + SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol) + : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), + SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo), + SwiftError(swifterror) {} + + void init(GCFunctionInfo *gfi, AliasAnalysis *AA, + const TargetLibraryInfo *li); + + /// Clear out the current SelectionDAG and the associated state and prepare + /// this SelectionDAGBuilder object to be used for a new block. This doesn't + /// clear out information about additional blocks that are needed to complete + /// switch lowering or PHI node updating; that information is cleared out as + /// it is consumed. + void clear(); + + /// Clear the dangling debug information map. This function is separated from + /// the clear so that debug information that is dangling in a basic block can + /// be properly resolved in a different basic block. This allows the + /// SelectionDAG to resolve dangling debug information attached to PHI nodes. + void clearDanglingDebugInfo(); + + /// Return the current virtual root of the Selection DAG, flushing any + /// PendingLoad items. This must be done before emitting a store or any other + /// node that may need to be ordered after any prior load instructions. + SDValue getRoot(); + + /// Similar to getRoot, but instead of flushing all the PendingLoad items, + /// flush all the PendingExports items. It is necessary to do this before + /// emitting a terminator instruction. + SDValue getControlRoot(); + + SDLoc getCurSDLoc() const { + return SDLoc(CurInst, SDNodeOrder); + } + + DebugLoc getCurDebugLoc() const { + return CurInst ? CurInst->getDebugLoc() : DebugLoc(); + } + + void CopyValueToVirtualRegister(const Value *V, unsigned Reg); + + void visit(const Instruction &I); + + void visit(unsigned Opcode, const User &I); + + /// If there was virtual register allocated for the value V emit CopyFromReg + /// of the specified type Ty. Return empty SDValue() otherwise. + SDValue getCopyFromRegs(const Value *V, Type *Ty); + + /// If we have dangling debug info that describes \p Variable, or an + /// overlapping part of variable considering the \p Expr, then this method + /// will drop that debug info as it isn't valid any longer. + void dropDanglingDebugInfo(const DILocalVariable *Variable, + const DIExpression *Expr); + + /// If we saw an earlier dbg_value referring to V, generate the debug data + /// structures now that we've seen its definition. + void resolveDanglingDebugInfo(const Value *V, SDValue Val); + + /// For the given dangling debuginfo record, perform last-ditch efforts to + /// resolve the debuginfo to something that is represented in this DAG. If + /// this cannot be done, produce an Undef debug value record. + void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI); + + /// For a given Value, attempt to create and record a SDDbgValue in the + /// SelectionDAG. + bool handleDebugValue(const Value *V, DILocalVariable *Var, + DIExpression *Expr, DebugLoc CurDL, + DebugLoc InstDL, unsigned Order); + + /// Evict any dangling debug information, attempting to salvage it first. + void resolveOrClearDbgInfo(); + + SDValue getValue(const Value *V); + bool findValue(const Value *V) const; + + /// Return the SDNode for the specified IR value if it exists. + SDNode *getNodeForIRValue(const Value *V) { + if (NodeMap.find(V) == NodeMap.end()) + return nullptr; + return NodeMap[V].getNode(); + } + + SDValue getNonRegisterValue(const Value *V); + SDValue getValueImpl(const Value *V); + + void setValue(const Value *V, SDValue NewN) { + SDValue &N = NodeMap[V]; + assert(!N.getNode() && "Already set a value for this node!"); + N = NewN; + } + + void setUnusedArgValue(const Value *V, SDValue NewN) { + SDValue &N = UnusedArgNodeMap[V]; + assert(!N.getNode() && "Already set a value for this node!"); + N = NewN; + } + + void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TProb, + BranchProbability FProb, bool InvertCond); + void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + BranchProbability TProb, BranchProbability FProb, + bool InvertCond); + bool ShouldEmitAsBranches(const std::vector<SwitchCG::CaseBlock> &Cases); + bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); + void CopyToExportRegsIfNeeded(const Value *V); + void ExportFromCurrentBlock(const Value *V); + void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, + const BasicBlock *EHPadBB = nullptr); + + // Lower range metadata from 0 to N to assert zext to an integer of nearest + // floor power of two. + SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, + SDValue Op); + + void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI, + const CallBase *Call, unsigned ArgIdx, + unsigned NumArgs, SDValue Callee, + Type *ReturnTy, bool IsPatchPoint); + + std::pair<SDValue, SDValue> + lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + const BasicBlock *EHPadBB = nullptr); + + /// When an MBB was split during scheduling, update the + /// references that need to refer to the last resulting block. + void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); + + /// Describes a gc.statepoint or a gc.statepoint like thing for the purposes + /// of lowering into a STATEPOINT node. + struct StatepointLoweringInfo { + /// Bases[i] is the base pointer for Ptrs[i]. Together they denote the set + /// of gc pointers this STATEPOINT has to relocate. + SmallVector<const Value *, 16> Bases; + SmallVector<const Value *, 16> Ptrs; + + /// The set of gc.relocate calls associated with this gc.statepoint. + SmallVector<const GCRelocateInst *, 16> GCRelocates; + + /// The full list of gc arguments to the gc.statepoint being lowered. + ArrayRef<const Use> GCArgs; + + /// The gc.statepoint instruction. + const Instruction *StatepointInstr = nullptr; + + /// The list of gc transition arguments present in the gc.statepoint being + /// lowered. + ArrayRef<const Use> GCTransitionArgs; + + /// The ID that the resulting STATEPOINT instruction has to report. + unsigned ID = -1; + + /// Information regarding the underlying call instruction. + TargetLowering::CallLoweringInfo CLI; + + /// The deoptimization state associated with this gc.statepoint call, if + /// any. + ArrayRef<const Use> DeoptState; + + /// Flags associated with the meta arguments being lowered. + uint64_t StatepointFlags = -1; + + /// The number of patchable bytes the call needs to get lowered into. + unsigned NumPatchBytes = -1; + + /// The exception handling unwind destination, in case this represents an + /// invoke of gc.statepoint. + const BasicBlock *EHPadBB = nullptr; + + explicit StatepointLoweringInfo(SelectionDAG &DAG) : CLI(DAG) {} + }; + + /// Lower \p SLI into a STATEPOINT instruction. + SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SI); + + // This function is responsible for the whole statepoint lowering process. + // It uniformly handles invoke and call statepoints. + void LowerStatepoint(ImmutableStatepoint ISP, + const BasicBlock *EHPadBB = nullptr); + + void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee, + const BasicBlock *EHPadBB); + + void LowerDeoptimizeCall(const CallInst *CI); + void LowerDeoptimizingReturn(); + + void LowerCallSiteWithDeoptBundleImpl(const CallBase *Call, SDValue Callee, + const BasicBlock *EHPadBB, + bool VarArgDisallowed, + bool ForceVoidReturnTy); + + /// Returns the type of FrameIndex and TargetFrameIndex nodes. + MVT getFrameIndexTy() { + return DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()); + } + +private: + // Terminator instructions. + void visitRet(const ReturnInst &I); + void visitBr(const BranchInst &I); + void visitSwitch(const SwitchInst &I); + void visitIndirectBr(const IndirectBrInst &I); + void visitUnreachable(const UnreachableInst &I); + void visitCleanupRet(const CleanupReturnInst &I); + void visitCatchSwitch(const CatchSwitchInst &I); + void visitCatchRet(const CatchReturnInst &I); + void visitCatchPad(const CatchPadInst &I); + void visitCleanupPad(const CleanupPadInst &CPI); + + BranchProbability getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; + void addSuccessorWithProb( + MachineBasicBlock *Src, MachineBasicBlock *Dst, + BranchProbability Prob = BranchProbability::getUnknown()); + +public: + void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB); + void visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB); + void visitSPDescriptorFailure(StackProtectorDescriptor &SPD); + void visitBitTestHeader(SwitchCG::BitTestBlock &B, + MachineBasicBlock *SwitchBB); + void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB, + BranchProbability BranchProbToNext, unsigned Reg, + SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB); + void visitJumpTable(SwitchCG::JumpTable &JT); + void visitJumpTableHeader(SwitchCG::JumpTable &JT, + SwitchCG::JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB); + +private: + // These all get lowered before this pass. + void visitInvoke(const InvokeInst &I); + void visitCallBr(const CallBrInst &I); + void visitResume(const ResumeInst &I); + + void visitUnary(const User &I, unsigned Opcode); + void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); } + + void visitBinary(const User &I, unsigned Opcode); + void visitShift(const User &I, unsigned Opcode); + void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } + void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } + void visitSub(const User &I) { visitBinary(I, ISD::SUB); } + void visitFSub(const User &I); + void visitMul(const User &I) { visitBinary(I, ISD::MUL); } + void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); } + void visitURem(const User &I) { visitBinary(I, ISD::UREM); } + void visitSRem(const User &I) { visitBinary(I, ISD::SREM); } + void visitFRem(const User &I) { visitBinary(I, ISD::FREM); } + void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); } + void visitSDiv(const User &I); + void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); } + void visitAnd (const User &I) { visitBinary(I, ISD::AND); } + void visitOr (const User &I) { visitBinary(I, ISD::OR); } + void visitXor (const User &I) { visitBinary(I, ISD::XOR); } + void visitShl (const User &I) { visitShift(I, ISD::SHL); } + void visitLShr(const User &I) { visitShift(I, ISD::SRL); } + void visitAShr(const User &I) { visitShift(I, ISD::SRA); } + void visitICmp(const User &I); + void visitFCmp(const User &I); + // Visit the conversion instructions + void visitTrunc(const User &I); + void visitZExt(const User &I); + void visitSExt(const User &I); + void visitFPTrunc(const User &I); + void visitFPExt(const User &I); + void visitFPToUI(const User &I); + void visitFPToSI(const User &I); + void visitUIToFP(const User &I); + void visitSIToFP(const User &I); + void visitPtrToInt(const User &I); + void visitIntToPtr(const User &I); + void visitBitCast(const User &I); + void visitAddrSpaceCast(const User &I); + + void visitExtractElement(const User &I); + void visitInsertElement(const User &I); + void visitShuffleVector(const User &I); + + void visitExtractValue(const User &I); + void visitInsertValue(const User &I); + void visitLandingPad(const LandingPadInst &LP); + + void visitGetElementPtr(const User &I); + void visitSelect(const User &I); + + void visitAlloca(const AllocaInst &I); + void visitLoad(const LoadInst &I); + void visitStore(const StoreInst &I); + void visitMaskedLoad(const CallInst &I, bool IsExpanding = false); + void visitMaskedStore(const CallInst &I, bool IsCompressing = false); + void visitMaskedGather(const CallInst &I); + void visitMaskedScatter(const CallInst &I); + void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); + void visitAtomicRMW(const AtomicRMWInst &I); + void visitFence(const FenceInst &I); + void visitPHI(const PHINode &I); + void visitCall(const CallInst &I); + bool visitMemCmpCall(const CallInst &I); + bool visitMemPCpyCall(const CallInst &I); + bool visitMemChrCall(const CallInst &I); + bool visitStrCpyCall(const CallInst &I, bool isStpcpy); + bool visitStrCmpCall(const CallInst &I); + bool visitStrLenCall(const CallInst &I); + bool visitStrNLenCall(const CallInst &I); + bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); + bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode); + void visitAtomicLoad(const LoadInst &I); + void visitAtomicStore(const StoreInst &I); + void visitLoadFromSwiftError(const LoadInst &I); + void visitStoreToSwiftError(const StoreInst &I); + + void visitInlineAsm(ImmutableCallSite CS); + void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); + void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); + void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); + + void visitVAStart(const CallInst &I); + void visitVAArg(const VAArgInst &I); + void visitVAEnd(const CallInst &I); + void visitVACopy(const CallInst &I); + void visitStackmap(const CallInst &I); + void visitPatchpoint(ImmutableCallSite CS, + const BasicBlock *EHPadBB = nullptr); + + // These two are implemented in StatepointLowering.cpp + void visitGCRelocate(const GCRelocateInst &Relocate); + void visitGCResult(const GCResultInst &I); + + void visitVectorReduce(const CallInst &I, unsigned Intrinsic); + + void visitUserOp1(const Instruction &I) { + llvm_unreachable("UserOp1 should not exist at instruction selection time!"); + } + void visitUserOp2(const Instruction &I) { + llvm_unreachable("UserOp2 should not exist at instruction selection time!"); + } + + void processIntegerCallValue(const Instruction &I, + SDValue Value, bool IsSigned); + + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); + + void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message); + + /// If V is an function argument then create corresponding DBG_VALUE machine + /// instruction for it now. At the end of instruction selection, they will be + /// inserted to the entry BB. + bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable, + DIExpression *Expr, DILocation *DL, + bool IsDbgDeclare, const SDValue &N); + + /// Return the next block after MBB, or nullptr if there is none. + MachineBasicBlock *NextBlock(MachineBasicBlock *MBB); + + /// Update the DAG and DAG builder with the relevant information after + /// a new root node has been created which could be a tail call. + void updateDAGForMaybeTailCall(SDValue MaybeTC); + + /// Return the appropriate SDDbgValue based on N. + SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable, + DIExpression *Expr, const DebugLoc &dl, + unsigned DbgSDNodeOrder); + + /// Lowers CallInst to an external symbol. + void lowerCallToExternalSymbol(const CallInst &I, const char *FunctionName); +}; + +/// This struct represents the registers (physical or virtual) +/// that a particular set of values is assigned, and the type information about +/// the value. The most common situation is to represent one value at a time, +/// but struct or array values are handled element-wise as multiple values. The +/// splitting of aggregates is performed recursively, so that we never have +/// aggregate-typed registers. The values at this point do not necessarily have +/// legal types, so each value may require one or more registers of some legal +/// type. +/// +struct RegsForValue { + /// The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + SmallVector<EVT, 4> ValueVTs; + + /// The value types of the registers. This is the same size as ValueVTs and it + /// records, for each value, what the type of the assigned register or + /// registers are. (Individual values are never synthesized from more than one + /// type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + SmallVector<MVT, 4> RegVTs; + + /// This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + SmallVector<unsigned, 4> Regs; + + /// This list holds the number of registers for each value. + SmallVector<unsigned, 4> RegCount; + + /// Records if this value needs to be treated in an ABI dependant manner, + /// different to normal type legalization. + Optional<CallingConv::ID> CallConv; + + RegsForValue() = default; + RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt, + Optional<CallingConv::ID> CC = None); + RegsForValue(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL, unsigned Reg, Type *Ty, + Optional<CallingConv::ID> CC); + + bool isABIMangled() const { + return CallConv.hasValue(); + } + + /// Add the specified values to this one. + void append(const RegsForValue &RHS) { + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + RegCount.push_back(RHS.Regs.size()); + } + + /// Emit a series of CopyFromReg nodes that copies from this value and returns + /// the result as a ValueVTs value. This uses Chain/Flag as the input and + /// updates them for the output Chain/Flag. If the Flag pointer is NULL, no + /// flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, + const SDLoc &dl, SDValue &Chain, SDValue *Flag, + const Value *V = nullptr) const; + + /// Emit a series of CopyToReg nodes that copies the specified value into the + /// registers specified by this object. This uses Chain/Flag as the input and + /// updates them for the output Chain/Flag. If the Flag pointer is nullptr, no + /// flag is used. If V is not nullptr, then it is used in printing better + /// diagnostic messages on error. + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl, + SDValue &Chain, SDValue *Flag, const Value *V = nullptr, + ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; + + /// Add this value to the specified inlineasm node operand list. This adds the + /// code marker, matching input operand index (if applicable), and includes + /// the number of values added into it. + void AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, const SDLoc &dl, + SelectionDAG &DAG, std::vector<SDValue> &Ops) const; + + /// Check if the total RegCount is greater than one. + bool occupiesMultipleRegs() const { + return std::accumulate(RegCount.begin(), RegCount.end(), 0) > 1; + } + + /// Return a list of registers and their sizes. + SmallVector<std::pair<unsigned, unsigned>, 4> getRegsAndSizes() const; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp new file mode 100644 index 0000000000000..bc10f76212394 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -0,0 +1,963 @@ +//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::dump method and friends. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/Printable.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "SDNodeDbgValue.h" +#include <cstdint> +#include <iterator> + +using namespace llvm; + +static cl::opt<bool> +VerboseDAGDumping("dag-dump-verbose", cl::Hidden, + cl::desc("Display more information when dumping selection " + "DAG nodes.")); + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<<Unknown DAG Node>>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->getName(getMachineOpcode()); + return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; + } + return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: return "<<Deleted Node!>>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::ATOMIC_FENCE: return "AtomicFence"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_CLR: return "AtomicLoadClr"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd"; + case ISD::ATOMIC_LOAD: return "AtomicLoad"; + case ISD::ATOMIC_STORE: return "AtomicStore"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + case ISD::RegisterMask: return "RegisterMask"; + case ISD::Constant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueConstant"; + return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::SPONENTRY: return "SPONENTRY"; + case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER"; + case ISD::READ_REGISTER: return "READ_REGISTER"; + case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EH_DWARF_CFA: return "EH_DWARF_CFA"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; + case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::TargetIndex: return "TargetIndex"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::BlockAddress: return "BlockAddress"; + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; + unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return Intrinsic::getName((Intrinsic::ID)IID, None); + else if (!G) + return "Unknown intrinsic"; + else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + return TII->getName(IID); + llvm_unreachable("Invalid intrinsic ID"); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueTargetConstant"; + return "TargetConstant"; + case ISD::TargetConstantFP: return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + case ISD::MCSymbol: return "MCSymbol"; + case ISD::TargetBlockAddress: return "TargetBlockAddress"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::INLINEASM_BR: return "inlineasm_br"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::ANNOTATION_LABEL: return "annotation_label"; + case ISD::HANDLENODE: return "handlenode"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FMINNUM: return "fminnum"; + case ISD::STRICT_FMINNUM: return "strict_fminnum"; + case ISD::FMAXNUM: return "fmaxnum"; + case ISD::STRICT_FMAXNUM: return "strict_fmaxnum"; + case ISD::FMINNUM_IEEE: return "fminnum_ieee"; + case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee"; + case ISD::FMINIMUM: return "fminimum"; + case ISD::FMAXIMUM: return "fmaximum"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::STRICT_FSQRT: return "strict_fsqrt"; + case ISD::FCBRT: return "fcbrt"; + case ISD::FSIN: return "fsin"; + case ISD::STRICT_FSIN: return "strict_fsin"; + case ISD::FCOS: return "fcos"; + case ISD::STRICT_FCOS: return "strict_fcos"; + case ISD::FSINCOS: return "fsincos"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::STRICT_FTRUNC: return "strict_ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::STRICT_FFLOOR: return "strict_ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::STRICT_FCEIL: return "strict_fceil"; + case ISD::FRINT: return "frint"; + case ISD::STRICT_FRINT: return "strict_frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint"; + case ISD::FROUND: return "fround"; + case ISD::STRICT_FROUND: return "strict_fround"; + case ISD::FEXP: return "fexp"; + case ISD::STRICT_FEXP: return "strict_fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::STRICT_FEXP2: return "strict_fexp2"; + case ISD::FLOG: return "flog"; + case ISD::STRICT_FLOG: return "strict_flog"; + case ISD::FLOG2: return "flog2"; + case ISD::STRICT_FLOG2: return "strict_flog2"; + case ISD::FLOG10: return "flog10"; + case ISD::STRICT_FLOG10: return "strict_flog10"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FSHL: return "fshl"; + case ISD::FSHR: return "fshr"; + case ISD::FADD: return "fadd"; + case ISD::STRICT_FADD: return "strict_fadd"; + case ISD::FSUB: return "fsub"; + case ISD::STRICT_FSUB: return "strict_fsub"; + case ISD::FMUL: return "fmul"; + case ISD::STRICT_FMUL: return "strict_fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::STRICT_FDIV: return "strict_fdiv"; + case ISD::FMA: return "fma"; + case ISD::STRICT_FMA: return "strict_fma"; + case ISD::FMAD: return "fmad"; + case ISD::FREM: return "frem"; + case ISD::STRICT_FREM: return "strict_frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + case ISD::FCANONICALIZE: return "fcanonicalize"; + case ISD::FPOW: return "fpow"; + case ISD::STRICT_FPOW: return "strict_fpow"; + case ISD::SMIN: return "smin"; + case ISD::SMAX: return "smax"; + case ISD::UMIN: return "umin"; + case ISD::UMAX: return "umax"; + + case ISD::FPOWI: return "fpowi"; + case ISD::STRICT_FPOWI: return "strict_fpowi"; + case ISD::SETCC: return "setcc"; + case ISD::SETCCCARRY: return "setcccarry"; + case ISD::SELECT: return "select"; + case ISD::VSELECT: return "vselect"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::INSERT_SUBVECTOR: return "insert_subvector"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::SPLAT_VECTOR: return "splat_vector"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::ADDCARRY: return "addcarry"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SUBCARRY: return "subcarry"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + case ISD::SADDSAT: return "saddsat"; + case ISD::UADDSAT: return "uaddsat"; + case ISD::SSUBSAT: return "ssubsat"; + case ISD::USUBSAT: return "usubsat"; + + case ISD::SMULFIX: return "smulfix"; + case ISD::SMULFIXSAT: return "smulfixsat"; + case ISD::UMULFIX: return "umulfix"; + case ISD::UMULFIXSAT: return "umulfixsat"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg"; + case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg"; + case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::STRICT_FP_ROUND: return "strict_fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_EXTEND: return "fp_extend"; + case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint"; + case ISD::BITCAST: return "bitcast"; + case ISD::ADDRSPACECAST: return "addrspacecast"; + case ISD::FP16_TO_FP: return "fp16_to_fp"; + case ISD::FP_TO_FP16: return "fp_to_fp16"; + case ISD::LROUND: return "lround"; + case ISD::STRICT_LROUND: return "strict_lround"; + case ISD::LLROUND: return "llround"; + case ISD::STRICT_LLROUND: return "strict_llround"; + case ISD::LRINT: return "lrint"; + case ISD::STRICT_LRINT: return "strict_lrint"; + case ISD::LLRINT: return "llrint"; + case ISD::STRICT_LLRINT: return "strict_llrint"; + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // EH instructions + case ISD::CATCHRET: return "catchret"; + case ISD::CLEANUPRET: return "cleanupret"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::MLOAD: return "masked_load"; + case ISD::MSTORE: return "masked_store"; + case ISD::MGATHER: return "masked_gather"; + case ISD::MSCATTER: return "masked_scatter"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + case ISD::DEBUGTRAP: return "debugtrap"; + case ISD::LIFETIME_START: return "lifetime.start"; + case ISD::LIFETIME_END: return "lifetime.end"; + case ISD::GC_TRANSITION_START: return "gc_transition.start"; + case ISD::GC_TRANSITION_END: return "gc_transition.end"; + case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; + + // Bit manipulation + case ISD::ABS: return "abs"; + case ISD::BITREVERSE: return "bitreverse"; + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; + case ISD::CTLZ: return "ctlz"; + case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; + + // Trampolines + case ISD::INIT_TRAMPOLINE: return "init_trampoline"; + case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; + + case ISD::CONDCODE: + switch (cast<CondCodeSDNode>(this)->get()) { + default: llvm_unreachable("Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setueq"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + + case ISD::SETTRUE: return "settrue"; + case ISD::SETTRUE2: return "settrue2"; + case ISD::SETFALSE: return "setfalse"; + case ISD::SETFALSE2: return "setfalse2"; + } + case ISD::VECREDUCE_FADD: return "vecreduce_fadd"; + case ISD::VECREDUCE_STRICT_FADD: return "vecreduce_strict_fadd"; + case ISD::VECREDUCE_FMUL: return "vecreduce_fmul"; + case ISD::VECREDUCE_STRICT_FMUL: return "vecreduce_strict_fmul"; + case ISD::VECREDUCE_ADD: return "vecreduce_add"; + case ISD::VECREDUCE_MUL: return "vecreduce_mul"; + case ISD::VECREDUCE_AND: return "vecreduce_and"; + case ISD::VECREDUCE_OR: return "vecreduce_or"; + case ISD::VECREDUCE_XOR: return "vecreduce_xor"; + case ISD::VECREDUCE_SMAX: return "vecreduce_smax"; + case ISD::VECREDUCE_SMIN: return "vecreduce_smin"; + case ISD::VECREDUCE_UMAX: return "vecreduce_umax"; + case ISD::VECREDUCE_UMIN: return "vecreduce_umin"; + case ISD::VECREDUCE_FMAX: return "vecreduce_fmax"; + case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: return ""; + case ISD::PRE_INC: return "<pre-inc>"; + case ISD::PRE_DEC: return "<pre-dec>"; + case ISD::POST_INC: return "<post-inc>"; + case ISD::POST_DEC: return "<post-dec>"; + } +} + +static Printable PrintNodeId(const SDNode &Node) { + return Printable([&Node](raw_ostream &OS) { +#ifndef NDEBUG + OS << 't' << Node.PersistentId; +#else + OS << (const void*)&Node; +#endif + }); +} + +// Print the MMO with more information from the SelectionDAG. +static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, + const MachineFunction *MF, const Module *M, + const MachineFrameInfo *MFI, + const TargetInstrInfo *TII, LLVMContext &Ctx) { + ModuleSlotTracker MST(M); + if (MF) + MST.incorporateFunction(MF->getFunction()); + SmallVector<StringRef, 0> SSNs; + MMO.print(OS, MST, SSNs, Ctx, MFI, TII); +} + +static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, + const SelectionDAG *G) { + if (G) { + const MachineFunction *MF = &G->getMachineFunction(); + return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(), + &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(), + *G->getContext()); + } else { + LLVMContext Ctx; + return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr, + /*MFI=*/nullptr, /*TII=*/nullptr, Ctx); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); } + +LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const { + print(dbgs(), G); + dbgs() << '\n'; +} +#endif + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getEVTString(); + } +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (getFlags().hasNoUnsignedWrap()) + OS << " nuw"; + + if (getFlags().hasNoSignedWrap()) + OS << " nsw"; + + if (getFlags().hasExact()) + OS << " exact"; + + if (getFlags().hasNoNaNs()) + OS << " nnan"; + + if (getFlags().hasNoInfs()) + OS << " ninf"; + + if (getFlags().hasNoSignedZeros()) + OS << " nsz"; + + if (getFlags().hasAllowReciprocal()) + OS << " arcp"; + + if (getFlags().hasAllowContract()) + OS << " contract"; + + if (getFlags().hasApproximateFuncs()) + OS << " afn"; + + if (getFlags().hasAllowReassociation()) + OS << " reassoc"; + + if (getFlags().hasVectorReduction()) + OS << " vector-reduction"; + + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { + if (!MN->memoperands_empty()) { + OS << "<"; + OS << "Mem:"; + for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), + e = MN->memoperands_end(); i != e; ++i) { + printMemOperand(OS, **i, G); + if (std::next(i) != e) + OS << " "; + } + OS << ">"; + } + } else if (const ShuffleVectorSDNode *SVN = + dyn_cast<ShuffleVectorSDNode>(this)) { + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { + if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEsingle()) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEdouble()) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "<APFloat("; + CSDN->getValueAPF().bitcastToAPInt().print(OS, false); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + GADN->getGlobal()->printAsOperand(OS); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = GADN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { + OS << "<" << JTDN->getIndex() << ">"; + if (unsigned int TF = JTDN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = CP->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) { + OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">"; + if (unsigned TF = TI->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { + OS << ' ' << printReg(R->getReg(), + G ? G->getSubtarget().getRegisterInfo() : nullptr); + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(this)) { + OS << "'" << ES->getSymbol() << "'"; + if (unsigned int TF = ES->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << "<null>"; + } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << "<null>"; + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { + OS << ":" << N->getVT().getEVTString(); + } + else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { + OS << "<"; + + printMemOperand(OS, *LD->getMemOperand(), G); + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << LD->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { + OS << "<"; + printMemOperand(OS, *ST->getMemOperand(), G); + + if (ST->isTruncatingStore()) + OS << ", trunc to " << ST->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const MaskedLoadSDNode *MLd = dyn_cast<MaskedLoadSDNode>(this)) { + OS << "<"; + + printMemOperand(OS, *MLd->getMemOperand(), G); + + bool doExt = true; + switch (MLd->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << MLd->getMemoryVT().getEVTString(); + + if (MLd->isExpandingLoad()) + OS << ", expanding"; + + OS << ">"; + } else if (const MaskedStoreSDNode *MSt = dyn_cast<MaskedStoreSDNode>(this)) { + OS << "<"; + printMemOperand(OS, *MSt->getMemOperand(), G); + + if (MSt->isTruncatingStore()) + OS << ", trunc to " << MSt->getMemoryVT().getEVTString(); + + if (MSt->isCompressingStore()) + OS << ", compressing"; + + OS << ">"; + } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { + OS << "<"; + printMemOperand(OS, *M->getMemOperand(), G); + OS << ">"; + } else if (const BlockAddressSDNode *BA = + dyn_cast<BlockAddressSDNode>(this)) { + int64_t offset = BA->getOffset(); + OS << "<"; + BA->getBlockAddress()->getFunction()->printAsOperand(OS, false); + OS << ", "; + BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false); + OS << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = BA->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const AddrSpaceCastSDNode *ASC = + dyn_cast<AddrSpaceCastSDNode>(this)) { + OS << '[' + << ASC->getSrcAddressSpace() + << " -> " + << ASC->getDestAddressSpace() + << ']'; + } else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) { + if (LN->hasOffset()) + OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">"; + } + + if (VerboseDAGDumping) { + if (unsigned Order = getIROrder()) + OS << " [ORD=" << Order << ']'; + + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; + if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this)))) + OS << " # D:" << isDivergent(); + + if (G && !G->GetDbgValues(this).empty()) { + OS << " [NoOfDbgValues=" << G->GetDbgValues(this).size() << ']'; + for (SDDbgValue *Dbg : G->GetDbgValues(this)) + if (!Dbg->isInvalidated()) + Dbg->print(OS); + } else if (getHasDebugValue()) + OS << " [NoOfDbgValues>0]"; + } +} + +LLVM_DUMP_METHOD void SDDbgValue::print(raw_ostream &OS) const { + OS << " DbgVal(Order=" << getOrder() << ')'; + if (isInvalidated()) OS << "(Invalidated)"; + if (isEmitted()) OS << "(Emitted)"; + switch (getKind()) { + case SDNODE: + if (getSDNode()) + OS << "(SDNODE=" << PrintNodeId(*getSDNode()) << ':' << getResNo() << ')'; + else + OS << "(SDNODE)"; + break; + case CONST: + OS << "(CONST)"; + break; + case FRAMEIX: + OS << "(FRAMEIX=" << getFrameIx() << ')'; + break; + case VREG: + OS << "(VREG=" << getVReg() << ')'; + break; + } + if (isIndirect()) OS << "(Indirect)"; + OS << ":\"" << Var->getName() << '"'; +#ifndef NDEBUG + if (Expr->getNumElements()) + Expr->dump(); +#endif +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void SDDbgValue::dump() const { + if (isInvalidated()) + return; + print(dbgs()); + dbgs() << "\n"; +} +#endif + +/// Return true if this node is so simple that we should just print it inline +/// if it appears as an operand. +static bool shouldPrintInline(const SDNode &Node, const SelectionDAG *G) { + // Avoid lots of cluttering when inline printing nodes with associated + // DbgValues in verbose mode. + if (VerboseDAGDumping && G && !G->GetDbgValues(&Node).empty()) + return false; + if (Node.getOpcode() == ISD::EntryToken) + return false; + return Node.getNumOperands() == 0; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (const SDValue &Op : N->op_values()) { + if (shouldPrintInline(*Op.getNode(), G)) + continue; + if (Op.getNode()->hasOneUse()) + DumpNodes(Op.getNode(), indent+2, G); + } + + dbgs().indent(indent); + N->dump(G); +} + +LLVM_DUMP_METHOD void SelectionDAG::dump() const { + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = &*I; + if (!N->hasOneUse() && N != getRoot().getNode() && + (!shouldPrintInline(*N, this) || N->use_empty())) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + dbgs() << "\n"; + + if (VerboseDAGDumping) { + if (DbgBegin() != DbgEnd()) + dbgs() << "SDDbgValues:\n"; + for (auto *Dbg : make_range(DbgBegin(), DbgEnd())) + Dbg->dump(); + if (ByvalParmDbgBegin() != ByvalParmDbgEnd()) + dbgs() << "Byval SDDbgValues:\n"; + for (auto *Dbg : make_range(ByvalParmDbgBegin(), ByvalParmDbgEnd())) + Dbg->dump(); + } + dbgs() << "\n"; +} +#endif + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + OS << PrintNodeId(*this) << ": "; + print_types(OS, G); + OS << " = " << getOperationName(G); + print_details(OS, G); +} + +static bool printOperand(raw_ostream &OS, const SelectionDAG *G, + const SDValue Value) { + if (!Value.getNode()) { + OS << "<null>"; + return false; + } else if (shouldPrintInline(*Value.getNode(), G)) { + OS << Value->getOperationName(G) << ':'; + Value->print_types(OS, G); + Value->print_details(OS, G); + return true; + } else { + OS << PrintNodeId(*Value.getNode()); + if (unsigned RN = Value.getResNo()) + OS << ':' << RN; + return false; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +using VisitedSDNodeSet = SmallPtrSet<const SDNode *, 32>; + +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N).second) // If we've been here before, return now. + return; + + // Dump the current SDNode, but don't end the line yet. + OS.indent(indent); + N->printr(OS, G); + + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (i) OS << ","; + OS << " "; + + const SDValue Op = N->getOperand(i); + bool printedInline = printOperand(OS, G, Op); + if (printedInline) + once.insert(Op.getNode()); + } + + OS << "\n"; + + // Dump children that have grandchildren on their own line(s). + for (const SDValue &Op : N->op_values()) + DumpNodesr(OS, Op.getNode(), indent+2, G, once); +} + +LLVM_DUMP_METHOD void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, nullptr, once); +} + +LLVM_DUMP_METHOD void SDNode::dumpr(const SelectionDAG *G) const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, G, once); +} +#endif + +static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, + const SelectionDAG *G, unsigned depth, + unsigned indent) { + if (depth == 0) + return; + + OS.indent(indent); + + N->print(OS, G); + + if (depth < 1) + return; + + for (const SDValue &Op : N->op_values()) { + // Don't follow chain operands. + if (Op.getValueType() == MVT::Other) + continue; + OS << '\n'; + printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2); + } +} + +void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, + unsigned depth) const { + printrWithDepthHelper(OS, this, G, depth, 0); +} + +void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { + // Don't print impossibly deep things. + printrWithDepth(OS, G, 10); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD +void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { + printrWithDepth(dbgs(), G, depth); +} + +LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const { + // Don't print impossibly deep things. + dumprWithDepth(G, 10); +} +#endif + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + printr(OS, G); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; else OS << " "; + printOperand(OS, G, getOperand(i)); + } + if (DebugLoc DL = getDebugLoc()) { + OS << ", "; + DL.print(OS); + } +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp new file mode 100644 index 0000000000000..1f07a241a8242 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -0,0 +1,3665 @@ +//===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAGISel class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "ScheduleDAGSDNodes.h" +#include "SelectionDAGBuilder.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachinePassRegistry.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/SwiftErrorValueTracking.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "isel" + +STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); +STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); +STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); +STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); +STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); +STATISTIC(NumEntryBlocks, "Number of entry blocks encountered"); +STATISTIC(NumFastIselFailLowerArguments, + "Number of entry blocks where fast isel failed to lower arguments"); + +static cl::opt<int> EnableFastISelAbort( + "fast-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower an instruction: 0 disable the abort, 1 will " + "abort but for args, calls and terminators, 2 will also " + "abort for argument lowering, and 3 will never fallback " + "to SelectionDAG.")); + +static cl::opt<bool> EnableFastISelFallbackReport( + "fast-isel-report-on-fallback", cl::Hidden, + cl::desc("Emit a diagnostic when \"fast\" instruction selection " + "falls back to SelectionDAG.")); + +static cl::opt<bool> +UseMBPI("use-mbpi", + cl::desc("use Machine Branch Probability Info"), + cl::init(true), cl::Hidden); + +#ifndef NDEBUG +static cl::opt<std::string> +FilterDAGBasicBlockName("filter-view-dags", cl::Hidden, + cl::desc("Only display the basic block whose name " + "matches this for all view-*-dags options")); +static cl::opt<bool> +ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the first " + "dag combine pass")); +static cl::opt<bool> +ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize types")); +static cl::opt<bool> +ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize")); +static cl::opt<bool> +ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the second " + "dag combine pass")); +static cl::opt<bool> +ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the post legalize types" + " dag combine pass")); +static cl::opt<bool> +ViewISelDAGs("view-isel-dags", cl::Hidden, + cl::desc("Pop up a window to show isel dags as they are selected")); +static cl::opt<bool> +ViewSchedDAGs("view-sched-dags", cl::Hidden, + cl::desc("Pop up a window to show sched dags as they are processed")); +static cl::opt<bool> +ViewSUnitDAGs("view-sunit-dags", cl::Hidden, + cl::desc("Pop up a window to show SUnit dags after they are processed")); +#else +static const bool ViewDAGCombine1 = false, + ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, + ViewDAGCombine2 = false, + ViewDAGCombineLT = false, + ViewISelDAGs = false, ViewSchedDAGs = false, + ViewSUnitDAGs = false; +#endif + +//===---------------------------------------------------------------------===// +/// +/// RegisterScheduler class - Track the registration of instruction schedulers. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry<RegisterScheduler::FunctionPassCtor> + RegisterScheduler::Registry; + +//===---------------------------------------------------------------------===// +/// +/// ISHeuristic command line option for instruction schedulers. +/// +//===---------------------------------------------------------------------===// +static cl::opt<RegisterScheduler::FunctionPassCtor, false, + RegisterPassParser<RegisterScheduler>> +ISHeuristic("pre-RA-sched", + cl::init(&createDefaultScheduler), cl::Hidden, + cl::desc("Instruction schedulers available (before register" + " allocation):")); + +static RegisterScheduler +defaultListDAGScheduler("default", "Best scheduler for the target", + createDefaultScheduler); + +namespace llvm { + + //===--------------------------------------------------------------------===// + /// This class is used by SelectionDAGISel to temporarily override + /// the optimization level on a per-function basis. + class OptLevelChanger { + SelectionDAGISel &IS; + CodeGenOpt::Level SavedOptLevel; + bool SavedFastISel; + + public: + OptLevelChanger(SelectionDAGISel &ISel, + CodeGenOpt::Level NewOptLevel) : IS(ISel) { + SavedOptLevel = IS.OptLevel; + if (NewOptLevel == SavedOptLevel) + return; + IS.OptLevel = NewOptLevel; + IS.TM.setOptLevel(NewOptLevel); + LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function " + << IS.MF->getFunction().getName() << "\n"); + LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" + << NewOptLevel << "\n"); + SavedFastISel = IS.TM.Options.EnableFastISel; + if (NewOptLevel == CodeGenOpt::None) { + IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); + LLVM_DEBUG( + dbgs() << "\tFastISel is " + << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") + << "\n"); + } + } + + ~OptLevelChanger() { + if (IS.OptLevel == SavedOptLevel) + return; + LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function " + << IS.MF->getFunction().getName() << "\n"); + LLVM_DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O" + << SavedOptLevel << "\n"); + IS.OptLevel = SavedOptLevel; + IS.TM.setOptLevel(SavedOptLevel); + IS.TM.setFastISel(SavedFastISel); + } + }; + + //===--------------------------------------------------------------------===// + /// createDefaultScheduler - This creates an instruction scheduler appropriate + /// for the target. + ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetLowering *TLI = IS->TLI; + const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); + + // Try first to see if the Target has its own way of selecting a scheduler + if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) { + return SchedulerCtor(IS, OptLevel); + } + + if (OptLevel == CodeGenOpt::None || + (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) || + TLI->getSchedulingPreference() == Sched::Source) + return createSourceListDAGScheduler(IS, OptLevel); + if (TLI->getSchedulingPreference() == Sched::RegPressure) + return createBURRListDAGScheduler(IS, OptLevel); + if (TLI->getSchedulingPreference() == Sched::Hybrid) + return createHybridListDAGScheduler(IS, OptLevel); + if (TLI->getSchedulingPreference() == Sched::VLIW) + return createVLIWDAGScheduler(IS, OptLevel); + assert(TLI->getSchedulingPreference() == Sched::ILP && + "Unknown sched type!"); + return createILPListDAGScheduler(IS, OptLevel); + } + +} // end namespace llvm + +// EmitInstrWithCustomInserter - This method should be implemented by targets +// that mark instructions with the 'usesCustomInserter' flag. These +// instructions are special in various ways, which require special support to +// insert. The specified MachineInstr is created but not inserted into any +// basic blocks, and this method is called to expand it into a sequence of +// instructions, potentially also creating new basic blocks and control flow. +// When new basic blocks are inserted and the edges from MBB to its successors +// are modified, the method should insert pairs of <OldSucc, NewSucc> into the +// DenseMap. +MachineBasicBlock * +TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *MBB) const { +#ifndef NDEBUG + dbgs() << "If a target marks an instruction with " + "'usesCustomInserter', it must implement " + "TargetLowering::EmitInstrWithCustomInserter!"; +#endif + llvm_unreachable(nullptr); +} + +void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const { + assert(!MI.hasPostISelHook() && + "If a target marks an instruction with 'hasPostISelHook', " + "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); +} + +//===----------------------------------------------------------------------===// +// SelectionDAGISel code +//===----------------------------------------------------------------------===// + +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, + CodeGenOpt::Level OL) : + MachineFunctionPass(ID), TM(tm), + FuncInfo(new FunctionLoweringInfo()), + SwiftError(new SwiftErrorValueTracking()), + CurDAG(new SelectionDAG(tm, OL)), + SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)), + AA(), GFI(), + OptLevel(OL), + DAGSize(0) { + initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + } + +SelectionDAGISel::~SelectionDAGISel() { + delete SDB; + delete CurDAG; + delete FuncInfo; + delete SwiftError; +} + +void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + if (OptLevel != CodeGenOpt::None) + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<GCModuleInfo>(); + AU.addRequired<StackProtector>(); + AU.addPreserved<GCModuleInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + if (UseMBPI && OptLevel != CodeGenOpt::None) + AU.addRequired<BranchProbabilityInfoWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that +/// may trap on it. In this case we have to split the edge so that the path +/// through the predecessor block that doesn't go to the phi block doesn't +/// execute the possibly trapping instruction. If available, we pass domtree +/// and loop info to be updated when we split critical edges. This is because +/// SelectionDAGISel preserves these analyses. +/// This is required for correctness, so it must be done at -O0. +/// +static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, + LoopInfo *LI) { + // Loop for blocks with phi nodes. + for (BasicBlock &BB : Fn) { + PHINode *PN = dyn_cast<PHINode>(BB.begin()); + if (!PN) continue; + + ReprocessBlock: + // For each block with a PHI node, check to see if any of the input values + // are potentially trapping constant expressions. Constant expressions are + // the only potentially trapping value that can occur as the argument to a + // PHI. + for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); + if (!CE || !CE->canTrap()) continue; + + // The only case we have to worry about is when the edge is critical. + // Since this block has a PHI Node, we assume it has multiple input + // edges: check to see if the pred has multiple successors. + BasicBlock *Pred = PN->getIncomingBlock(i); + if (Pred->getTerminator()->getNumSuccessors() == 1) + continue; + + // Okay, we have to split this edge. + SplitCriticalEdge( + Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), + CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges()); + goto ReprocessBlock; + } + } +} + +static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F, + MachineModuleInfo &MMI) { + // Only needed for MSVC + if (!TT.isWindowsMSVCEnvironment()) + return; + + // If it's already set, nothing to do. + if (MMI.usesMSVCFloatingPoint()) + return; + + for (const Instruction &I : instructions(F)) { + if (I.getType()->isFPOrFPVectorTy()) { + MMI.setUsesMSVCFloatingPoint(true); + return; + } + for (const auto &Op : I.operands()) { + if (Op->getType()->isFPOrFPVectorTy()) { + MMI.setUsesMSVCFloatingPoint(true); + return; + } + } + } +} + +bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { + // If we already selected that function, we do not need to run SDISel. + if (mf.getProperties().hasProperty( + MachineFunctionProperties::Property::Selected)) + return false; + // Do some sanity-checking on the command-line options. + assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && + "-fast-isel-abort > 0 requires -fast-isel"); + + const Function &Fn = mf.getFunction(); + MF = &mf; + + // Reset the target options before resetting the optimization + // level below. + // FIXME: This is a horrible hack and should be processed via + // codegen looking at the optimization level explicitly when + // it wants to look at it. + TM.resetTargetOptions(Fn); + // Reset OptLevel to None for optnone functions. + CodeGenOpt::Level NewOptLevel = OptLevel; + if (OptLevel != CodeGenOpt::None && skipFunction(Fn)) + NewOptLevel = CodeGenOpt::None; + OptLevelChanger OLC(*this, NewOptLevel); + + TII = MF->getSubtarget().getInstrInfo(); + TLI = MF->getSubtarget().getTargetLowering(); + RegInfo = &MF->getRegInfo(); + LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn); + GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; + ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + + LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); + + SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); + + CurDAG->init(*MF, *ORE, this, LibInfo, + getAnalysisIfAvailable<LegacyDivergenceAnalysis>()); + FuncInfo->set(Fn, *MF, CurDAG); + SwiftError->setFunction(*MF); + + // Now get the optional analyzes if we want to. + // This is based on the possibly changed OptLevel (after optnone is taken + // into account). That's unfortunate but OK because it just means we won't + // ask for passes that have been required anyway. + + if (UseMBPI && OptLevel != CodeGenOpt::None) + FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + else + FuncInfo->BPI = nullptr; + + if (OptLevel != CodeGenOpt::None) + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + else + AA = nullptr; + + SDB->init(GFI, AA, LibInfo); + + MF->setHasInlineAsm(false); + + FuncInfo->SplitCSR = false; + + // We split CSR if the target supports it for the given function + // and the function has only return exits. + if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) { + FuncInfo->SplitCSR = true; + + // Collect all the return blocks. + for (const BasicBlock &BB : Fn) { + if (!succ_empty(&BB)) + continue; + + const Instruction *Term = BB.getTerminator(); + if (isa<UnreachableInst>(Term) || isa<ReturnInst>(Term)) + continue; + + // Bail out if the exit block is not Return nor Unreachable. + FuncInfo->SplitCSR = false; + break; + } + } + + MachineBasicBlock *EntryMBB = &MF->front(); + if (FuncInfo->SplitCSR) + // This performs initialization so lowering for SplitCSR will be correct. + TLI->initializeSplitCSR(EntryMBB); + + SelectAllBasicBlocks(Fn); + if (FastISelFailed && EnableFastISelFallbackReport) { + DiagnosticInfoISelFallback DiagFallback(Fn); + Fn.getContext().diagnose(DiagFallback); + } + + // Replace forward-declared registers with the registers containing + // the desired value. + // Note: it is important that this happens **before** the call to + // EmitLiveInCopies, since implementations can skip copies of unused + // registers. If we don't apply the reg fixups before, some registers may + // appear as unused and will be skipped, resulting in bad MI. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (DenseMap<unsigned, unsigned>::iterator I = FuncInfo->RegFixups.begin(), + E = FuncInfo->RegFixups.end(); + I != E; ++I) { + unsigned From = I->first; + unsigned To = I->second; + // If To is also scheduled to be replaced, find what its ultimate + // replacement is. + while (true) { + DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To); + if (J == E) + break; + To = J->second; + } + // Make sure the new register has a sufficiently constrained register class. + if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) + MRI.constrainRegClass(To, MRI.getRegClass(From)); + // Replace it. + + // Replacing one register with another won't touch the kill flags. + // We need to conservatively clear the kill flags as a kill on the old + // register might dominate existing uses of the new register. + if (!MRI.use_empty(To)) + MRI.clearKillFlags(From); + MRI.replaceRegWith(From, To); + } + + // If the first basic block in the function has live ins that need to be + // copied into vregs, emit the copies into the top of the block before + // emitting the code for the block. + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); + + // Insert copies in the entry block and the return blocks. + if (FuncInfo->SplitCSR) { + SmallVector<MachineBasicBlock*, 4> Returns; + // Collect all the return blocks. + for (MachineBasicBlock &MBB : mf) { + if (!MBB.succ_empty()) + continue; + + MachineBasicBlock::iterator Term = MBB.getFirstTerminator(); + if (Term != MBB.end() && Term->isReturn()) { + Returns.push_back(&MBB); + continue; + } + } + TLI->insertCopiesSplitCSR(EntryMBB, Returns); + } + + DenseMap<unsigned, unsigned> LiveInMap; + if (!FuncInfo->ArgDbgValues.empty()) + for (std::pair<unsigned, unsigned> LI : RegInfo->liveins()) + if (LI.second) + LiveInMap.insert(LI); + + // Insert DBG_VALUE instructions for function arguments to the entry block. + for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { + MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; + bool hasFI = MI->getOperand(0).isFI(); + Register Reg = + hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); + if (Register::isPhysicalRegister(Reg)) + EntryMBB->insert(EntryMBB->begin(), MI); + else { + MachineInstr *Def = RegInfo->getVRegDef(Reg); + if (Def) { + MachineBasicBlock::iterator InsertPos = Def; + // FIXME: VR def may not be in entry block. + Def->getParent()->insert(std::next(InsertPos), MI); + } else + LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg" + << Register::virtReg2Index(Reg) << "\n"); + } + + // If Reg is live-in then update debug info to track its copy in a vreg. + DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg); + if (LDI != LiveInMap.end()) { + assert(!hasFI && "There's no handling of frame pointer updating here yet " + "- add if needed"); + MachineInstr *Def = RegInfo->getVRegDef(LDI->second); + MachineBasicBlock::iterator InsertPos = Def; + const MDNode *Variable = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); + DebugLoc DL = MI->getDebugLoc(); + bool IsIndirect = MI->isIndirectDebugValue(); + if (IsIndirect) + assert(MI->getOperand(1).getImm() == 0 && + "DBG_VALUE with nonzero offset"); + assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + // Def is never a terminator here, so it is ok to increment InsertPos. + BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, LDI->second, Variable, Expr); + + // If this vreg is directly copied into an exported register then + // that COPY instructions also need DBG_VALUE, if it is the only + // user of LDI->second. + MachineInstr *CopyUseMI = nullptr; + for (MachineRegisterInfo::use_instr_iterator + UI = RegInfo->use_instr_begin(LDI->second), + E = RegInfo->use_instr_end(); UI != E; ) { + MachineInstr *UseMI = &*(UI++); + if (UseMI->isDebugValue()) continue; + if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { + CopyUseMI = UseMI; continue; + } + // Otherwise this is another use or second copy use. + CopyUseMI = nullptr; break; + } + if (CopyUseMI) { + // Use MI's debug location, which describes where Variable was + // declared, rather than whatever is attached to CopyUseMI. + MachineInstr *NewMI = + BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + CopyUseMI->getOperand(0).getReg(), Variable, Expr); + MachineBasicBlock::iterator Pos = CopyUseMI; + EntryMBB->insertAfter(Pos, NewMI); + } + } + } + + // Determine if there are any calls in this machine function. + MachineFrameInfo &MFI = MF->getFrameInfo(); + for (const auto &MBB : *MF) { + if (MFI.hasCalls() && MF->hasInlineAsm()) + break; + + for (const auto &MI : MBB) { + const MCInstrDesc &MCID = TII->get(MI.getOpcode()); + if ((MCID.isCall() && !MCID.isReturn()) || + MI.isStackAligningInlineAsm()) { + MFI.setHasCalls(true); + } + if (MI.isInlineAsm()) { + MF->setHasInlineAsm(true); + } + } + } + + // Determine if there is a call to setjmp in the machine function. + MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); + + // Determine if floating point is used for msvc + computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI()); + + // Replace forward-declared registers with the registers containing + // the desired value. + for (DenseMap<unsigned, unsigned>::iterator + I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); + I != E; ++I) { + unsigned From = I->first; + unsigned To = I->second; + // If To is also scheduled to be replaced, find what its ultimate + // replacement is. + while (true) { + DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To); + if (J == E) break; + To = J->second; + } + // Make sure the new register has a sufficiently constrained register class. + if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) + MRI.constrainRegClass(To, MRI.getRegClass(From)); + // Replace it. + + + // Replacing one register with another won't touch the kill flags. + // We need to conservatively clear the kill flags as a kill on the old + // register might dominate existing uses of the new register. + if (!MRI.use_empty(To)) + MRI.clearKillFlags(From); + MRI.replaceRegWith(From, To); + } + + TLI->finalizeLowering(*MF); + + // Release function-specific state. SDB and CurDAG are already cleared + // at this point. + FuncInfo->clear(); + + LLVM_DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); + LLVM_DEBUG(MF->print(dbgs())); + + return true; +} + +static void reportFastISelFailure(MachineFunction &MF, + OptimizationRemarkEmitter &ORE, + OptimizationRemarkMissed &R, + bool ShouldAbort) { + // Print the function name explicitly if we don't have a debug location (which + // makes the diagnostic less useful) or if we're going to emit a raw error. + if (!R.getLocation().isValid() || ShouldAbort) + R << (" (in function: " + MF.getName() + ")").str(); + + if (ShouldAbort) + report_fatal_error(R.getMsg()); + + ORE.emit(R); +} + +void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall) { + // Allow creating illegal types during DAG building for the basic block. + CurDAG->NewNodesMustHaveLegalTypes = false; + + // Lower the instructions. If a call is emitted as a tail call, cease emitting + // nodes for this block. + for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { + if (!ElidedArgCopyInstrs.count(&*I)) + SDB->visit(*I); + } + + // Make sure the root of the DAG is up-to-date. + CurDAG->setRoot(SDB->getControlRoot()); + HadTailCall = SDB->HasTailCall; + SDB->resolveOrClearDbgInfo(); + SDB->clear(); + + // Final step, emit the lowered DAG as machine code. + CodeGenAndEmitDAG(); +} + +void SelectionDAGISel::ComputeLiveOutVRegInfo() { + SmallPtrSet<SDNode*, 16> VisitedNodes; + SmallVector<SDNode*, 128> Worklist; + + Worklist.push_back(CurDAG->getRoot().getNode()); + + KnownBits Known; + + do { + SDNode *N = Worklist.pop_back_val(); + + // If we've already seen this node, ignore it. + if (!VisitedNodes.insert(N).second) + continue; + + // Otherwise, add all chain operands to the worklist. + for (const SDValue &Op : N->op_values()) + if (Op.getValueType() == MVT::Other) + Worklist.push_back(Op.getNode()); + + // If this is a CopyToReg with a vreg dest, process it. + if (N->getOpcode() != ISD::CopyToReg) + continue; + + unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); + if (!Register::isVirtualRegister(DestReg)) + continue; + + // Ignore non-integer values. + SDValue Src = N->getOperand(2); + EVT SrcVT = Src.getValueType(); + if (!SrcVT.isInteger()) + continue; + + unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); + Known = CurDAG->computeKnownBits(Src); + FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known); + } while (!Worklist.empty()); +} + +void SelectionDAGISel::CodeGenAndEmitDAG() { + StringRef GroupName = "sdag"; + StringRef GroupDescription = "Instruction Selection and Scheduling"; + std::string BlockName; + bool MatchFilterBB = false; (void)MatchFilterBB; +#ifndef NDEBUG + TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn); +#endif + + // Pre-type legalization allow creation of any node types. + CurDAG->NewNodesMustHaveLegalTypes = false; + +#ifndef NDEBUG + MatchFilterBB = (FilterDAGBasicBlockName.empty() || + FilterDAGBasicBlockName == + FuncInfo->MBB->getBasicBlock()->getName()); +#endif +#ifdef NDEBUG + if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || + ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || + ViewSUnitDAGs) +#endif + { + BlockName = + (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); + } + LLVM_DEBUG(dbgs() << "Initial selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + + if (ViewDAGCombine1 && MatchFilterBB) + CurDAG->viewGraph("dag-combine1 input for " + BlockName); + + // Run the DAG combiner in pre-legalize mode. + { + NamedRegionTimer T("combine1", "DAG Combining 1", GroupName, + GroupDescription, TimePassesIsEnabled); + CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); + } + +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + + LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + + // Second step, hack on the DAG until it only uses operations and types that + // the target supports. + if (ViewLegalizeTypesDAGs && MatchFilterBB) + CurDAG->viewGraph("legalize-types input for " + BlockName); + + bool Changed; + { + NamedRegionTimer T("legalize_types", "Type Legalization", GroupName, + GroupDescription, TimePassesIsEnabled); + Changed = CurDAG->LegalizeTypes(); + } + +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + + LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + + // Only allow creation of legal node types. + CurDAG->NewNodesMustHaveLegalTypes = true; + + if (Changed) { + if (ViewDAGCombineLT && MatchFilterBB) + CurDAG->viewGraph("dag-combine-lt input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + { + NamedRegionTimer T("combine_lt", "DAG Combining after legalize types", + GroupName, GroupDescription, TimePassesIsEnabled); + CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); + } + +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + + LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + } + + { + NamedRegionTimer T("legalize_vec", "Vector Legalization", GroupName, + GroupDescription, TimePassesIsEnabled); + Changed = CurDAG->LegalizeVectors(); + } + + if (Changed) { + LLVM_DEBUG(dbgs() << "Vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + + { + NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, + GroupDescription, TimePassesIsEnabled); + CurDAG->LegalizeTypes(); + } + + LLVM_DEBUG(dbgs() << "Vector/type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + + if (ViewDAGCombineLT && MatchFilterBB) + CurDAG->viewGraph("dag-combine-lv input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + { + NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors", + GroupName, GroupDescription, TimePassesIsEnabled); + CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel); + } + + LLVM_DEBUG(dbgs() << "Optimized vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + } + + if (ViewLegalizeDAGs && MatchFilterBB) + CurDAG->viewGraph("legalize input for " + BlockName); + + { + NamedRegionTimer T("legalize", "DAG Legalization", GroupName, + GroupDescription, TimePassesIsEnabled); + CurDAG->Legalize(); + } + +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + + LLVM_DEBUG(dbgs() << "Legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + + if (ViewDAGCombine2 && MatchFilterBB) + CurDAG->viewGraph("dag-combine2 input for " + BlockName); + + // Run the DAG combiner in post-legalize mode. + { + NamedRegionTimer T("combine2", "DAG Combining 2", GroupName, + GroupDescription, TimePassesIsEnabled); + CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); + } + +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + + LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + + if (OptLevel != CodeGenOpt::None) + ComputeLiveOutVRegInfo(); + + if (ViewISelDAGs && MatchFilterBB) + CurDAG->viewGraph("isel input for " + BlockName); + + // Third, instruction select all of the operations to machine code, adding the + // code to the MachineBasicBlock. + { + NamedRegionTimer T("isel", "Instruction Selection", GroupName, + GroupDescription, TimePassesIsEnabled); + DoInstructionSelection(); + } + + LLVM_DEBUG(dbgs() << "Selected selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + + if (ViewSchedDAGs && MatchFilterBB) + CurDAG->viewGraph("scheduler input for " + BlockName); + + // Schedule machine code. + ScheduleDAGSDNodes *Scheduler = CreateScheduler(); + { + NamedRegionTimer T("sched", "Instruction Scheduling", GroupName, + GroupDescription, TimePassesIsEnabled); + Scheduler->Run(CurDAG, FuncInfo->MBB); + } + + if (ViewSUnitDAGs && MatchFilterBB) + Scheduler->viewGraph(); + + // Emit machine code to BB. This can change 'BB' to the last block being + // inserted into. + MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB; + { + NamedRegionTimer T("emit", "Instruction Creation", GroupName, + GroupDescription, TimePassesIsEnabled); + + // FuncInfo->InsertPt is passed by reference and set to the end of the + // scheduled instructions. + LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt); + } + + // If the block was split, make sure we update any references that are used to + // update PHI nodes later on. + if (FirstMBB != LastMBB) + SDB->UpdateSplitBlock(FirstMBB, LastMBB); + + // Free the scheduler state. + { + NamedRegionTimer T("cleanup", "Instruction Scheduling Cleanup", GroupName, + GroupDescription, TimePassesIsEnabled); + delete Scheduler; + } + + // Free the SelectionDAG state, now that we're finished with it. + CurDAG->clear(); +} + +namespace { + +/// ISelUpdater - helper class to handle updates of the instruction selection +/// graph. +class ISelUpdater : public SelectionDAG::DAGUpdateListener { + SelectionDAG::allnodes_iterator &ISelPosition; + +public: + ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp) + : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {} + + /// NodeDeleted - Handle nodes deleted from the graph. If the node being + /// deleted is the current ISelPosition node, update ISelPosition. + /// + void NodeDeleted(SDNode *N, SDNode *E) override { + if (ISelPosition == SelectionDAG::allnodes_iterator(N)) + ++ISelPosition; + } +}; + +} // end anonymous namespace + +// This function is used to enforce the topological node id property +// property leveraged during Instruction selection. Before selection all +// nodes are given a non-negative id such that all nodes have a larger id than +// their operands. As this holds transitively we can prune checks that a node N +// is a predecessor of M another by not recursively checking through M's +// operands if N's ID is larger than M's ID. This is significantly improves +// performance of for various legality checks (e.g. IsLegalToFold / +// UpdateChains). + +// However, when we fuse multiple nodes into a single node +// during selection we may induce a predecessor relationship between inputs and +// outputs of distinct nodes being merged violating the topological property. +// Should a fused node have a successor which has yet to be selected, our +// legality checks would be incorrect. To avoid this we mark all unselected +// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x => +// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M. +// We use bit-negation to more clearly enforce that node id -1 can only be +// achieved by selected nodes). As the conversion is reversable the original Id, +// topological pruning can still be leveraged when looking for unselected nodes. +// This method is call internally in all ISel replacement calls. +void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) { + SmallVector<SDNode *, 4> Nodes; + Nodes.push_back(Node); + + while (!Nodes.empty()) { + SDNode *N = Nodes.pop_back_val(); + for (auto *U : N->uses()) { + auto UId = U->getNodeId(); + if (UId > 0) { + InvalidateNodeId(U); + Nodes.push_back(U); + } + } + } +} + +// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a +// NodeId with the equivalent node id which is invalid for topological +// pruning. +void SelectionDAGISel::InvalidateNodeId(SDNode *N) { + int InvalidId = -(N->getNodeId() + 1); + N->setNodeId(InvalidId); +} + +// getUninvalidatedNodeId - get original uninvalidated node id. +int SelectionDAGISel::getUninvalidatedNodeId(SDNode *N) { + int Id = N->getNodeId(); + if (Id < -1) + return -(Id + 1); + return Id; +} + +void SelectionDAGISel::DoInstructionSelection() { + LLVM_DEBUG(dbgs() << "===== Instruction selection begins: " + << printMBBReference(*FuncInfo->MBB) << " '" + << FuncInfo->MBB->getName() << "'\n"); + + PreprocessISelDAG(); + + // Select target instructions for the DAG. + { + // Number all nodes with a topological order and set DAGSize. + DAGSize = CurDAG->AssignTopologicalOrder(); + + // Create a dummy node (which is not added to allnodes), that adds + // a reference to the root node, preventing it from being deleted, + // and tracking any changes of the root. + HandleSDNode Dummy(CurDAG->getRoot()); + SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode()); + ++ISelPosition; + + // Make sure that ISelPosition gets properly updated when nodes are deleted + // in calls made from this function. + ISelUpdater ISU(*CurDAG, ISelPosition); + + // The AllNodes list is now topological-sorted. Visit the + // nodes by starting at the end of the list (the root of the + // graph) and preceding back toward the beginning (the entry + // node). + while (ISelPosition != CurDAG->allnodes_begin()) { + SDNode *Node = &*--ISelPosition; + // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes, + // but there are currently some corner cases that it misses. Also, this + // makes it theoretically possible to disable the DAGCombiner. + if (Node->use_empty()) + continue; + +#ifndef NDEBUG + SmallVector<SDNode *, 4> Nodes; + Nodes.push_back(Node); + + while (!Nodes.empty()) { + auto N = Nodes.pop_back_val(); + if (N->getOpcode() == ISD::TokenFactor || N->getNodeId() < 0) + continue; + for (const SDValue &Op : N->op_values()) { + if (Op->getOpcode() == ISD::TokenFactor) + Nodes.push_back(Op.getNode()); + else { + // We rely on topological ordering of node ids for checking for + // cycles when fusing nodes during selection. All unselected nodes + // successors of an already selected node should have a negative id. + // This assertion will catch such cases. If this assertion triggers + // it is likely you using DAG-level Value/Node replacement functions + // (versus equivalent ISEL replacement) in backend-specific + // selections. See comment in EnforceNodeIdInvariant for more + // details. + assert(Op->getNodeId() != -1 && + "Node has already selected predecessor node"); + } + } + } +#endif + + // When we are using non-default rounding modes or FP exception behavior + // FP operations are represented by StrictFP pseudo-operations. For + // targets that do not (yet) understand strict FP operations directly, + // we convert them to normal FP opcodes instead at this point. This + // will allow them to be handled by existing target-specific instruction + // selectors. + if (Node->isStrictFPOpcode() && + (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0)) + != TargetLowering::Legal)) + Node = CurDAG->mutateStrictFPToFP(Node); + + LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: "; + Node->dump(CurDAG)); + + Select(Node); + } + + CurDAG->setRoot(Dummy.getValue()); + } + + LLVM_DEBUG(dbgs() << "\n===== Instruction selection ends:\n"); + + PostprocessISelDAG(); +} + +static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) { + for (const User *U : CPI->users()) { + if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) { + Intrinsic::ID IID = EHPtrCall->getIntrinsicID(); + if (IID == Intrinsic::eh_exceptionpointer || + IID == Intrinsic::eh_exceptioncode) + return true; + } + } + return false; +} + +// wasm.landingpad.index intrinsic is for associating a landing pad index number +// with a catchpad instruction. Retrieve the landing pad index in the intrinsic +// and store the mapping in the function. +static void mapWasmLandingPadIndex(MachineBasicBlock *MBB, + const CatchPadInst *CPI) { + MachineFunction *MF = MBB->getParent(); + // In case of single catch (...), we don't emit LSDA, so we don't need + // this information. + bool IsSingleCatchAllClause = + CPI->getNumArgOperands() == 1 && + cast<Constant>(CPI->getArgOperand(0))->isNullValue(); + if (!IsSingleCatchAllClause) { + // Create a mapping from landing pad label to landing pad index. + bool IntrFound = false; + for (const User *U : CPI->users()) { + if (const auto *Call = dyn_cast<IntrinsicInst>(U)) { + Intrinsic::ID IID = Call->getIntrinsicID(); + if (IID == Intrinsic::wasm_landingpad_index) { + Value *IndexArg = Call->getArgOperand(1); + int Index = cast<ConstantInt>(IndexArg)->getZExtValue(); + MF->setWasmLandingPadIndex(MBB, Index); + IntrFound = true; + break; + } + } + } + assert(IntrFound && "wasm.landingpad.index intrinsic not found!"); + (void)IntrFound; + } +} + +/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and +/// do other setup for EH landing-pad blocks. +bool SelectionDAGISel::PrepareEHLandingPad() { + MachineBasicBlock *MBB = FuncInfo->MBB; + const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn(); + const BasicBlock *LLVMBB = MBB->getBasicBlock(); + const TargetRegisterClass *PtrRC = + TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout())); + + auto Pers = classifyEHPersonality(PersonalityFn); + + // Catchpads have one live-in register, which typically holds the exception + // pointer or code. + if (isFuncletEHPersonality(Pers)) { + if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) { + if (hasExceptionPointerOrCodeUser(CPI)) { + // Get or create the virtual register to hold the pointer or code. Mark + // the live in physreg and copy into the vreg. + MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn); + assert(EHPhysReg && "target lacks exception pointer register"); + MBB->addLiveIn(EHPhysReg); + unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC); + BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), + TII->get(TargetOpcode::COPY), VReg) + .addReg(EHPhysReg, RegState::Kill); + } + } + return true; + } + + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + MCSymbol *Label = MF->addLandingPad(MBB); + + const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL); + BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) + .addSym(Label); + + if (Pers == EHPersonality::Wasm_CXX) { + if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) + mapWasmLandingPadIndex(MBB, CPI); + } else { + // Assign the call site to the landing pad's begin label. + MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); + // Mark exception register as live in. + if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn)) + FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); + // Mark exception selector register as live in. + if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn)) + FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); + } + + return true; +} + +/// isFoldedOrDeadInstruction - Return true if the specified instruction is +/// side-effect free and is either dead or folded into a generated instruction. +/// Return false if it needs to be emitted. +static bool isFoldedOrDeadInstruction(const Instruction *I, + FunctionLoweringInfo *FuncInfo) { + return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. + !I->isTerminator() && // Terminators aren't folded. + !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. + !I->isEHPad() && // EH pad instructions aren't folded. + !FuncInfo->isExportedInst(I); // Exported instrs must be computed. +} + +/// Collect llvm.dbg.declare information. This is done after argument lowering +/// in case the declarations refer to arguments. +static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { + MachineFunction *MF = FuncInfo->MF; + const DataLayout &DL = MF->getDataLayout(); + for (const BasicBlock &BB : *FuncInfo->Fn) { + for (const Instruction &I : BB) { + const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I); + if (!DI) + continue; + + assert(DI->getVariable() && "Missing variable"); + assert(DI->getDebugLoc() && "Missing location"); + const Value *Address = DI->getAddress(); + if (!Address) + continue; + + // Look through casts and constant offset GEPs. These mostly come from + // inalloca. + APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0); + Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + // Check if the variable is a static alloca or a byval or inalloca + // argument passed in memory. If it is not, then we will ignore this + // intrinsic and handle this during isel like dbg.value. + int FI = std::numeric_limits<int>::max(); + if (const auto *AI = dyn_cast<AllocaInst>(Address)) { + auto SI = FuncInfo->StaticAllocaMap.find(AI); + if (SI != FuncInfo->StaticAllocaMap.end()) + FI = SI->second; + } else if (const auto *Arg = dyn_cast<Argument>(Address)) + FI = FuncInfo->getArgumentFrameIndex(Arg); + + if (FI == std::numeric_limits<int>::max()) + continue; + + DIExpression *Expr = DI->getExpression(); + if (Offset.getBoolValue()) + Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, + Offset.getZExtValue()); + MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc()); + } + } +} + +void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { + FastISelFailed = false; + // Initialize the Fast-ISel state, if needed. + FastISel *FastIS = nullptr; + if (TM.Options.EnableFastISel) { + LLVM_DEBUG(dbgs() << "Enabling fast-isel\n"); + FastIS = TLI->createFastISel(*FuncInfo, LibInfo); + } + + ReversePostOrderTraversal<const Function*> RPOT(&Fn); + + // Lower arguments up front. An RPO iteration always visits the entry block + // first. + assert(*RPOT.begin() == &Fn.getEntryBlock()); + ++NumEntryBlocks; + + // Set up FuncInfo for ISel. Entry blocks never have PHIs. + FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()]; + FuncInfo->InsertPt = FuncInfo->MBB->begin(); + + CurDAG->setFunctionLoweringInfo(FuncInfo); + + if (!FastIS) { + LowerArguments(Fn); + } else { + // See if fast isel can lower the arguments. + FastIS->startNewBlock(); + if (!FastIS->lowerArguments()) { + FastISelFailed = true; + // Fast isel failed to lower these arguments + ++NumFastIselFailLowerArguments; + + OptimizationRemarkMissed R("sdagisel", "FastISelFailure", + Fn.getSubprogram(), + &Fn.getEntryBlock()); + R << "FastISel didn't lower all arguments: " + << ore::NV("Prototype", Fn.getType()); + reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 1); + + // Use SelectionDAG argument lowering + LowerArguments(Fn); + CurDAG->setRoot(SDB->getControlRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // If we inserted any instructions at the beginning, make a note of + // where they are, so we can be sure to emit subsequent instructions + // after them. + if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) + FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); + else + FastIS->setLastLocalValue(nullptr); + } + + bool Inserted = SwiftError->createEntriesInEntryBlock(SDB->getCurDebugLoc()); + + if (FastIS && Inserted) + FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); + + processDbgDeclares(FuncInfo); + + // Iterate over all basic blocks in the function. + StackProtector &SP = getAnalysis<StackProtector>(); + for (const BasicBlock *LLVMBB : RPOT) { + if (OptLevel != CodeGenOpt::None) { + bool AllPredsVisited = true; + for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); + PI != PE; ++PI) { + if (!FuncInfo->VisitedBBs.count(*PI)) { + AllPredsVisited = false; + break; + } + } + + if (AllPredsVisited) { + for (const PHINode &PN : LLVMBB->phis()) + FuncInfo->ComputePHILiveOutRegInfo(&PN); + } else { + for (const PHINode &PN : LLVMBB->phis()) + FuncInfo->InvalidatePHILiveOutRegInfo(&PN); + } + + FuncInfo->VisitedBBs.insert(LLVMBB); + } + + BasicBlock::const_iterator const Begin = + LLVMBB->getFirstNonPHI()->getIterator(); + BasicBlock::const_iterator const End = LLVMBB->end(); + BasicBlock::const_iterator BI = End; + + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; + if (!FuncInfo->MBB) + continue; // Some blocks like catchpads have no code or MBB. + + // Insert new instructions after any phi or argument setup code. + FuncInfo->InsertPt = FuncInfo->MBB->end(); + + // Setup an EH landing-pad block. + FuncInfo->ExceptionPointerVirtReg = 0; + FuncInfo->ExceptionSelectorVirtReg = 0; + if (LLVMBB->isEHPad()) + if (!PrepareEHLandingPad()) + continue; + + // Before doing SelectionDAG ISel, see if FastISel has been requested. + if (FastIS) { + if (LLVMBB != &Fn.getEntryBlock()) + FastIS->startNewBlock(); + + unsigned NumFastIselRemaining = std::distance(Begin, End); + + // Pre-assign swifterror vregs. + SwiftError->preassignVRegs(FuncInfo->MBB, Begin, End); + + // Do FastISel on as many instructions as possible. + for (; BI != Begin; --BI) { + const Instruction *Inst = &*std::prev(BI); + + // If we no longer require this instruction, skip it. + if (isFoldedOrDeadInstruction(Inst, FuncInfo) || + ElidedArgCopyInstrs.count(Inst)) { + --NumFastIselRemaining; + continue; + } + + // Bottom-up: reset the insert pos at the top, after any local-value + // instructions. + FastIS->recomputeInsertPt(); + + // Try to select the instruction with FastISel. + if (FastIS->selectInstruction(Inst)) { + --NumFastIselRemaining; + ++NumFastIselSuccess; + // If fast isel succeeded, skip over all the folded instructions, and + // then see if there is a load right before the selected instructions. + // Try to fold the load if so. + const Instruction *BeforeInst = Inst; + while (BeforeInst != &*Begin) { + BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst)); + if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) + break; + } + if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && + BeforeInst->hasOneUse() && + FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) { + // If we succeeded, don't re-select the load. + BI = std::next(BasicBlock::const_iterator(BeforeInst)); + --NumFastIselRemaining; + ++NumFastIselSuccess; + } + continue; + } + + FastISelFailed = true; + + // Then handle certain instructions as single-LLVM-Instruction blocks. + // We cannot separate out GCrelocates to their own blocks since we need + // to keep track of gc-relocates for a particular gc-statepoint. This is + // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before + // visitGCRelocate. + if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst) && + !isGCResult(Inst)) { + OptimizationRemarkMissed R("sdagisel", "FastISelFailure", + Inst->getDebugLoc(), LLVMBB); + + R << "FastISel missed call"; + + if (R.isEnabled() || EnableFastISelAbort) { + std::string InstStrStorage; + raw_string_ostream InstStr(InstStrStorage); + InstStr << *Inst; + + R << ": " << InstStr.str(); + } + + reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 2); + + if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() && + !Inst->use_empty()) { + unsigned &R = FuncInfo->ValueMap[Inst]; + if (!R) + R = FuncInfo->CreateRegs(Inst); + } + + bool HadTailCall = false; + MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt; + SelectBasicBlock(Inst->getIterator(), BI, HadTailCall); + + // If the call was emitted as a tail call, we're done with the block. + // We also need to delete any previously emitted instructions. + if (HadTailCall) { + FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end()); + --BI; + break; + } + + // Recompute NumFastIselRemaining as Selection DAG instruction + // selection may have handled the call, input args, etc. + unsigned RemainingNow = std::distance(Begin, BI); + NumFastIselFailures += NumFastIselRemaining - RemainingNow; + NumFastIselRemaining = RemainingNow; + continue; + } + + OptimizationRemarkMissed R("sdagisel", "FastISelFailure", + Inst->getDebugLoc(), LLVMBB); + + bool ShouldAbort = EnableFastISelAbort; + if (Inst->isTerminator()) { + // Use a different message for terminator misses. + R << "FastISel missed terminator"; + // Don't abort for terminator unless the level is really high + ShouldAbort = (EnableFastISelAbort > 2); + } else { + R << "FastISel missed"; + } + + if (R.isEnabled() || EnableFastISelAbort) { + std::string InstStrStorage; + raw_string_ostream InstStr(InstStrStorage); + InstStr << *Inst; + R << ": " << InstStr.str(); + } + + reportFastISelFailure(*MF, *ORE, R, ShouldAbort); + + NumFastIselFailures += NumFastIselRemaining; + break; + } + + FastIS->recomputeInsertPt(); + } + + if (SP.shouldEmitSDCheck(*LLVMBB)) { + bool FunctionBasedInstrumentation = + TLI->getSSPStackGuardCheck(*Fn.getParent()); + SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB], + FunctionBasedInstrumentation); + } + + if (Begin != BI) + ++NumDAGBlocks; + else + ++NumFastIselBlocks; + + if (Begin != BI) { + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); + + // But if FastISel was run, we already selected some of the block. + // If we emitted a tail-call, we need to delete any previously emitted + // instruction that follows it. + if (HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end()) + FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end()); + } + + if (FastIS) + FastIS->finishBasicBlock(); + FinishBasicBlock(); + FuncInfo->PHINodesToUpdate.clear(); + ElidedArgCopyInstrs.clear(); + } + + SP.copyToMachineFrameInfo(MF->getFrameInfo()); + + SwiftError->propagateVRegs(); + + delete FastIS; + SDB->clearDanglingDebugInfo(); + SDB->SPDescriptor.resetPerFunctionState(); +} + +/// Given that the input MI is before a partial terminator sequence TSeq, return +/// true if M + TSeq also a partial terminator sequence. +/// +/// A Terminator sequence is a sequence of MachineInstrs which at this point in +/// lowering copy vregs into physical registers, which are then passed into +/// terminator instructors so we can satisfy ABI constraints. A partial +/// terminator sequence is an improper subset of a terminator sequence (i.e. it +/// may be the whole terminator sequence). +static bool MIIsInTerminatorSequence(const MachineInstr &MI) { + // If we do not have a copy or an implicit def, we return true if and only if + // MI is a debug value. + if (!MI.isCopy() && !MI.isImplicitDef()) + // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the + // physical registers if there is debug info associated with the terminator + // of our mbb. We want to include said debug info in our terminator + // sequence, so we return true in that case. + return MI.isDebugValue(); + + // We have left the terminator sequence if we are not doing one of the + // following: + // + // 1. Copying a vreg into a physical register. + // 2. Copying a vreg into a vreg. + // 3. Defining a register via an implicit def. + + // OPI should always be a register definition... + MachineInstr::const_mop_iterator OPI = MI.operands_begin(); + if (!OPI->isReg() || !OPI->isDef()) + return false; + + // Defining any register via an implicit def is always ok. + if (MI.isImplicitDef()) + return true; + + // Grab the copy source... + MachineInstr::const_mop_iterator OPI2 = OPI; + ++OPI2; + assert(OPI2 != MI.operands_end() + && "Should have a copy implying we should have 2 arguments."); + + // Make sure that the copy dest is not a vreg when the copy source is a + // physical register. + if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) && + Register::isPhysicalRegister(OPI2->getReg()))) + return false; + + return true; +} + +/// Find the split point at which to splice the end of BB into its success stack +/// protector check machine basic block. +/// +/// On many platforms, due to ABI constraints, terminators, even before register +/// allocation, use physical registers. This creates an issue for us since +/// physical registers at this point can not travel across basic +/// blocks. Luckily, selectiondag always moves physical registers into vregs +/// when they enter functions and moves them through a sequence of copies back +/// into the physical registers right before the terminator creating a +/// ``Terminator Sequence''. This function is searching for the beginning of the +/// terminator sequence so that we can ensure that we splice off not just the +/// terminator, but additionally the copies that move the vregs into the +/// physical registers. +static MachineBasicBlock::iterator +FindSplitPointForStackProtector(MachineBasicBlock *BB) { + MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); + // + if (SplitPoint == BB->begin()) + return SplitPoint; + + MachineBasicBlock::iterator Start = BB->begin(); + MachineBasicBlock::iterator Previous = SplitPoint; + --Previous; + + while (MIIsInTerminatorSequence(*Previous)) { + SplitPoint = Previous; + if (Previous == Start) + break; + --Previous; + } + + return SplitPoint; +} + +void +SelectionDAGISel::FinishBasicBlock() { + LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: " + << FuncInfo->PHINodesToUpdate.size() << "\n"; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; + ++i) dbgs() + << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first + << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); + + // Next, now that we know what the last MBB the LLVM BB expanded is, update + // PHI nodes in successors. + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) + continue; + PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); + } + + // Handle stack protector. + if (SDB->SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) { + // The target provides a guard check function. There is no need to + // generate error handling code or to split current basic block. + MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); + + // Add load and check to the basicblock. + FuncInfo->MBB = ParentMBB; + FuncInfo->InsertPt = + FindSplitPointForStackProtector(ParentMBB); + SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Clear the Per-BB State. + SDB->SPDescriptor.resetPerBBState(); + } else if (SDB->SPDescriptor.shouldEmitStackProtector()) { + MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); + MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB(); + + // Find the split point to split the parent mbb. At the same time copy all + // physical registers used in the tail of parent mbb into virtual registers + // before the split point and back into physical registers after the split + // point. This prevents us needing to deal with Live-ins and many other + // register allocation issues caused by us splitting the parent mbb. The + // register allocator will clean up said virtual copies later on. + MachineBasicBlock::iterator SplitPoint = + FindSplitPointForStackProtector(ParentMBB); + + // Splice the terminator of ParentMBB into SuccessMBB. + SuccessMBB->splice(SuccessMBB->end(), ParentMBB, + SplitPoint, + ParentMBB->end()); + + // Add compare/jump on neq/jump to the parent BB. + FuncInfo->MBB = ParentMBB; + FuncInfo->InsertPt = ParentMBB->end(); + SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // CodeGen Failure MBB if we have not codegened it yet. + MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); + if (FailureMBB->empty()) { + FuncInfo->MBB = FailureMBB; + FuncInfo->InsertPt = FailureMBB->end(); + SDB->visitSPDescriptorFailure(SDB->SPDescriptor); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Clear the Per-BB State. + SDB->SPDescriptor.resetPerBBState(); + } + + // Lower each BitTestBlock. + for (auto &BTB : SDB->SL->BitTestCases) { + // Lower header first, if it wasn't already lowered + if (!BTB.Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = BTB.Parent; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitBitTestHeader(BTB, FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + BranchProbability UnhandledProb = BTB.Prob; + for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { + UnhandledProb -= BTB.Cases[j].ExtraProb; + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = BTB.Cases[j].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + + // If all cases cover a contiguous range, it is not necessary to jump to + // the default block after the last bit test fails. This is because the + // range check during bit test header creation has guaranteed that every + // case here doesn't go outside the range. In this case, there is no need + // to perform the last bit test, as it will always be true. Instead, make + // the second-to-last bit-test fall through to the target of the last bit + // test, and delete the last bit test. + + MachineBasicBlock *NextMBB; + if (BTB.ContiguousRange && j + 2 == ej) { + // Second-to-last bit-test with contiguous range: fall through to the + // target of the final bit test. + NextMBB = BTB.Cases[j + 1].TargetBB; + } else if (j + 1 == ej) { + // For the last bit test, fall through to Default. + NextMBB = BTB.Default; + } else { + // Otherwise, fall through to the next bit test. + NextMBB = BTB.Cases[j + 1].ThisBB; + } + + SDB->visitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], + FuncInfo->MBB); + + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + if (BTB.ContiguousRange && j + 2 == ej) { + // Since we're not going to use the final bit test, remove it. + BTB.Cases.pop_back(); + break; + } + } + + // Update PHI Nodes + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + // This is "default" BB. We have two jumps to it. From "header" BB and + // from last "case" BB, unless the latter was skipped. + if (PHIBB == BTB.Default) { + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(BTB.Parent); + if (!BTB.ContiguousRange) { + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(BTB.Cases.back().ThisBB); + } + } + // One of "cases" BB. + for (unsigned j = 0, ej = BTB.Cases.size(); + j != ej; ++j) { + MachineBasicBlock* cBB = BTB.Cases[j].ThisBB; + if (cBB->isSuccessor(PHIBB)) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB); + } + } + } + SDB->SL->BitTestCases.clear(); + + // If the JumpTable record is filled in, then we need to emit a jump table. + // Updating the PHI nodes is tricky in this case, since we need to determine + // whether the PHI is a successor of the range check MBB or the jump table MBB + for (unsigned i = 0, e = SDB->SL->JTCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDB->SL->JTCases[i].first.Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->SL->JTCases[i].first.HeaderBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitJumpTableHeader(SDB->SL->JTCases[i].second, + SDB->SL->JTCases[i].first, FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->SL->JTCases[i].second.MBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitJumpTable(SDB->SL->JTCases[i].second); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Update PHI Nodes + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + // "default" BB. We can go there only from header BB. + if (PHIBB == SDB->SL->JTCases[i].second.Default) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->SL->JTCases[i].first.HeaderBB); + // JT BB. Just iterate over successors here + if (FuncInfo->MBB->isSuccessor(PHIBB)) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB); + } + } + SDB->SL->JTCases.clear(); + + // If we generated any switch lowering information, build and codegen any + // additional DAGs necessary. + for (unsigned i = 0, e = SDB->SL->SwitchCases.size(); i != e; ++i) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->SL->SwitchCases[i].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + + // Determine the unique successors. + SmallVector<MachineBasicBlock *, 2> Succs; + Succs.push_back(SDB->SL->SwitchCases[i].TrueBB); + if (SDB->SL->SwitchCases[i].TrueBB != SDB->SL->SwitchCases[i].FalseBB) + Succs.push_back(SDB->SL->SwitchCases[i].FalseBB); + + // Emit the code. Note that this could result in FuncInfo->MBB being split. + SDB->visitSwitchCase(SDB->SL->SwitchCases[i], FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Remember the last block, now that any splitting is done, for use in + // populating PHI nodes in successors. + MachineBasicBlock *ThisBB = FuncInfo->MBB; + + // Handle any PHI nodes in successors of this chunk, as if we were coming + // from the original BB before switch expansion. Note that PHI nodes can + // occur multiple times in PHINodesToUpdate. We have to be very careful to + // handle them the right number of times. + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + FuncInfo->MBB = Succs[i]; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // FuncInfo->MBB may have been removed from the CFG if a branch was + // constant folded. + if (ThisBB->isSuccessor(FuncInfo->MBB)) { + for (MachineBasicBlock::iterator + MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end(); + MBBI != MBBE && MBBI->isPHI(); ++MBBI) { + MachineInstrBuilder PHI(*MF, MBBI); + // This value for this PHI node is recorded in PHINodesToUpdate. + for (unsigned pn = 0; ; ++pn) { + assert(pn != FuncInfo->PHINodesToUpdate.size() && + "Didn't find PHI entry!"); + if (FuncInfo->PHINodesToUpdate[pn].first == PHI) { + PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB); + break; + } + } + } + } + } + } + SDB->SL->SwitchCases.clear(); +} + +/// Create the scheduler. If a specific scheduler was specified +/// via the SchedulerRegistry, use it, otherwise select the +/// one preferred by the target. +/// +ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { + return ISHeuristic(this, OptLevel); +} + +//===----------------------------------------------------------------------===// +// Helper functions used by the generated instruction selector. +//===----------------------------------------------------------------------===// +// Calls to these methods are generated by tblgen. + +/// CheckAndMask - The isel is trying to match something like (and X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (!ActualMask.isSubsetOf(DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + +/// CheckOrMask - The isel is trying to match something like (or X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (!ActualMask.isSubsetOf(DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + KnownBits Known = CurDAG->computeKnownBits(LHS); + + // If all the missing bits in the or are already known to be set, match! + if (NeededMask.isSubsetOf(Known.One)) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + +/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated +/// by tblgen. Others should not call it. +void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, + const SDLoc &DL) { + std::vector<SDValue> InOps; + std::swap(InOps, Ops); + + Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 + Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 + Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc + Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack) + + unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); + if (InOps[e-1].getValueType() == MVT::Glue) + --e; // Don't process a glue operand if it is here. + + while (i != e) { + unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); + if (!InlineAsm::isMemKind(Flags)) { + // Just skip over this operand, copying the operands verbatim. + Ops.insert(Ops.end(), InOps.begin()+i, + InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); + i += InlineAsm::getNumOperandRegisters(Flags) + 1; + } else { + assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && + "Memory operand with multiple values?"); + + unsigned TiedToOperand; + if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) { + // We need the constraint ID from the operand this is tied to. + unsigned CurOp = InlineAsm::Op_FirstOperand; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + for (; TiedToOperand; --TiedToOperand) { + CurOp += InlineAsm::getNumOperandRegisters(Flags)+1; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + } + } + + // Otherwise, this is a memory operand. Ask the target to select it. + std::vector<SDValue> SelOps; + unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags); + if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps)) + report_fatal_error("Could not match memory address. Inline asm" + " failure!"); + + // Add this to the output node. + unsigned NewFlags = + InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID); + Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); + Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); + i += 2; + } + } + + // Add the glue input back if present. + if (e != InOps.size()) + Ops.push_back(InOps.back()); +} + +/// findGlueUse - Return use of MVT::Glue value produced by the specified +/// SDNode. +/// +static SDNode *findGlueUse(SDNode *N) { + unsigned FlagResNo = N->getNumValues()-1; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDUse &Use = I.getUse(); + if (Use.getResNo() == FlagResNo) + return Use.getUser(); + } + return nullptr; +} + +/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path +/// beyond "ImmedUse". We may ignore chains as they are checked separately. +static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, + bool IgnoreChains) { + SmallPtrSet<const SDNode *, 16> Visited; + SmallVector<const SDNode *, 16> WorkList; + // Only check if we have non-immediate uses of Def. + if (ImmedUse->isOnlyUserOf(Def)) + return false; + + // We don't care about paths to Def that go through ImmedUse so mark it + // visited and mark non-def operands as used. + Visited.insert(ImmedUse); + for (const SDValue &Op : ImmedUse->op_values()) { + SDNode *N = Op.getNode(); + // Ignore chain deps (they are validated by + // HandleMergeInputChains) and immediate uses + if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) + continue; + if (!Visited.insert(N).second) + continue; + WorkList.push_back(N); + } + + // Initialize worklist to operands of Root. + if (Root != ImmedUse) { + for (const SDValue &Op : Root->op_values()) { + SDNode *N = Op.getNode(); + // Ignore chains (they are validated by HandleMergeInputChains) + if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) + continue; + if (!Visited.insert(N).second) + continue; + WorkList.push_back(N); + } + } + + return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true); +} + +/// IsProfitableToFold - Returns true if it's profitable to fold the specific +/// operand node N of U during instruction selection that starts at Root. +bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; + return N.hasOneUse(); +} + +/// IsLegalToFold - Returns true if the specific operand node N of +/// U can be folded during instruction selection that starts at Root. +bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, + CodeGenOpt::Level OptLevel, + bool IgnoreChains) { + if (OptLevel == CodeGenOpt::None) return false; + + // If Root use can somehow reach N through a path that that doesn't contain + // U then folding N would create a cycle. e.g. In the following + // diagram, Root can reach N through X. If N is folded into Root, then + // X is both a predecessor and a successor of U. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ / // + // \ / // + // [Root*] // + // + // * indicates nodes to be folded together. + // + // If Root produces glue, then it gets (even more) interesting. Since it + // will be "glued" together with its glue use in the scheduler, we need to + // check if it might reach N. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ \ // + // \ | // + // [Root*] | // + // ^ | // + // f | // + // | / // + // [Y] / // + // ^ / // + // f / // + // | / // + // [GU] // + // + // If GU (glue use) indirectly reaches N (the load), and Root folds N + // (call it Fold), then X is a predecessor of GU and a successor of + // Fold. But since Fold and GU are glued together, this will create + // a cycle in the scheduling graph. + + // If the node has glue, walk down the graph to the "lowest" node in the + // glueged set. + EVT VT = Root->getValueType(Root->getNumValues()-1); + while (VT == MVT::Glue) { + SDNode *GU = findGlueUse(Root); + if (!GU) + break; + Root = GU; + VT = Root->getValueType(Root->getNumValues()-1); + + // If our query node has a glue result with a use, we've walked up it. If + // the user (which has already been selected) has a chain or indirectly uses + // the chain, HandleMergeInputChains will not consider it. Because of + // this, we cannot ignore chains in this predicate. + IgnoreChains = false; + } + + return !findNonImmUse(Root, N.getNode(), U, IgnoreChains); +} + +void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) { + SDLoc DL(N); + + std::vector<SDValue> Ops(N->op_begin(), N->op_end()); + SelectInlineAsmMemoryOperands(Ops, DL); + + const EVT VTs[] = {MVT::Other, MVT::Glue}; + SDValue New = CurDAG->getNode(Branch ? ISD::INLINEASM_BR : ISD::INLINEASM, DL, VTs, Ops); + New->setNodeId(-1); + ReplaceUses(N, New.getNode()); + CurDAG->RemoveDeadNode(N); +} + +void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); + const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + Register Reg = + TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0), + CurDAG->getMachineFunction()); + SDValue New = CurDAG->getCopyFromReg( + Op->getOperand(0), dl, Reg, Op->getValueType(0)); + New->setNodeId(-1); + ReplaceUses(Op, New.getNode()); + CurDAG->RemoveDeadNode(Op); +} + +void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); + const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), + Op->getOperand(2).getValueType(), + CurDAG->getMachineFunction()); + SDValue New = CurDAG->getCopyToReg( + Op->getOperand(0), dl, Reg, Op->getOperand(2)); + New->setNodeId(-1); + ReplaceUses(Op, New.getNode()); + CurDAG->RemoveDeadNode(Op); +} + +void SelectionDAGISel::Select_UNDEF(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); +} + +/// GetVBR - decode a vbr encoding whose top bit is set. +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t +GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { + assert(Val >= 128 && "Not a VBR"); + Val &= 127; // Remove first vbr bit. + + unsigned Shift = 7; + uint64_t NextBits; + do { + NextBits = MatcherTable[Idx++]; + Val |= (NextBits&127) << Shift; + Shift += 7; + } while (NextBits & 128); + + return Val; +} + +/// When a match is complete, this method updates uses of interior chain results +/// to use the new results. +void SelectionDAGISel::UpdateChains( + SDNode *NodeToMatch, SDValue InputChain, + SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) { + SmallVector<SDNode*, 4> NowDeadNodes; + + // Now that all the normal results are replaced, we replace the chain and + // glue results if present. + if (!ChainNodesMatched.empty()) { + assert(InputChain.getNode() && + "Matched input chains but didn't produce a chain"); + // Loop over all of the nodes we matched that produced a chain result. + // Replace all the chain results with the final chain we ended up with. + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + SDNode *ChainNode = ChainNodesMatched[i]; + // If ChainNode is null, it's because we replaced it on a previous + // iteration and we cleared it out of the map. Just skip it. + if (!ChainNode) + continue; + + assert(ChainNode->getOpcode() != ISD::DELETED_NODE && + "Deleted node left in chain"); + + // Don't replace the results of the root node if we're doing a + // MorphNodeTo. + if (ChainNode == NodeToMatch && isMorphNodeTo) + continue; + + SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1); + if (ChainVal.getValueType() == MVT::Glue) + ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); + assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); + SelectionDAG::DAGNodeDeletedListener NDL( + *CurDAG, [&](SDNode *N, SDNode *E) { + std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N, + static_cast<SDNode *>(nullptr)); + }); + if (ChainNode->getOpcode() != ISD::TokenFactor) + ReplaceUses(ChainVal, InputChain); + + // If the node became dead and we haven't already seen it, delete it. + if (ChainNode != NodeToMatch && ChainNode->use_empty() && + !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) + NowDeadNodes.push_back(ChainNode); + } + } + + if (!NowDeadNodes.empty()) + CurDAG->RemoveDeadNodes(NowDeadNodes); + + LLVM_DEBUG(dbgs() << "ISEL: Match complete!\n"); +} + +/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains +/// operation for when the pattern matched at least one node with a chains. The +/// input vector contains a list of all of the chained nodes that we match. We +/// must determine if this is a valid thing to cover (i.e. matching it won't +/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will +/// be used as the input node chain for the generated nodes. +static SDValue +HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, + SelectionDAG *CurDAG) { + + SmallPtrSet<const SDNode *, 16> Visited; + SmallVector<const SDNode *, 8> Worklist; + SmallVector<SDValue, 3> InputChains; + unsigned int Max = 8192; + + // Quick exit on trivial merge. + if (ChainNodesMatched.size() == 1) + return ChainNodesMatched[0]->getOperand(0); + + // Add chains that aren't already added (internal). Peek through + // token factors. + std::function<void(const SDValue)> AddChains = [&](const SDValue V) { + if (V.getValueType() != MVT::Other) + return; + if (V->getOpcode() == ISD::EntryToken) + return; + if (!Visited.insert(V.getNode()).second) + return; + if (V->getOpcode() == ISD::TokenFactor) { + for (const SDValue &Op : V->op_values()) + AddChains(Op); + } else + InputChains.push_back(V); + }; + + for (auto *N : ChainNodesMatched) { + Worklist.push_back(N); + Visited.insert(N); + } + + while (!Worklist.empty()) + AddChains(Worklist.pop_back_val()->getOperand(0)); + + // Skip the search if there are no chain dependencies. + if (InputChains.size() == 0) + return CurDAG->getEntryNode(); + + // If one of these chains is a successor of input, we must have a + // node that is both the predecessor and successor of the + // to-be-merged nodes. Fail. + Visited.clear(); + for (SDValue V : InputChains) + Worklist.push_back(V.getNode()); + + for (auto *N : ChainNodesMatched) + if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true)) + return SDValue(); + + // Return merged chain. + if (InputChains.size() == 1) + return InputChains[0]; + return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), + MVT::Other, InputChains); +} + +/// MorphNode - Handle morphing a node in place for the selector. +SDNode *SelectionDAGISel:: +MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, + ArrayRef<SDValue> Ops, unsigned EmitNodeInfo) { + // It is possible we're using MorphNodeTo to replace a node with no + // normal results with one that has a normal result (or we could be + // adding a chain) and the input could have glue and chains as well. + // In this case we need to shift the operands down. + // FIXME: This is a horrible hack and broken in obscure cases, no worse + // than the old isel though. + int OldGlueResultNo = -1, OldChainResultNo = -1; + + unsigned NTMNumResults = Node->getNumValues(); + if (Node->getValueType(NTMNumResults-1) == MVT::Glue) { + OldGlueResultNo = NTMNumResults-1; + if (NTMNumResults != 1 && + Node->getValueType(NTMNumResults-2) == MVT::Other) + OldChainResultNo = NTMNumResults-2; + } else if (Node->getValueType(NTMNumResults-1) == MVT::Other) + OldChainResultNo = NTMNumResults-1; + + // Call the underlying SelectionDAG routine to do the transmogrification. Note + // that this deletes operands of the old node that become dead. + SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops); + + // MorphNodeTo can operate in two ways: if an existing node with the + // specified operands exists, it can just return it. Otherwise, it + // updates the node in place to have the requested operands. + if (Res == Node) { + // If we updated the node in place, reset the node ID. To the isel, + // this should be just like a newly allocated machine node. + Res->setNodeId(-1); + } + + unsigned ResNumResults = Res->getNumValues(); + // Move the glue if needed. + if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && + (unsigned)OldGlueResultNo != ResNumResults-1) + ReplaceUses(SDValue(Node, OldGlueResultNo), + SDValue(Res, ResNumResults - 1)); + + if ((EmitNodeInfo & OPFL_GlueOutput) != 0) + --ResNumResults; + + // Move the chain reference if needed. + if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && + (unsigned)OldChainResultNo != ResNumResults-1) + ReplaceUses(SDValue(Node, OldChainResultNo), + SDValue(Res, ResNumResults - 1)); + + // Otherwise, no replacement happened because the node already exists. Replace + // Uses of the old node with the new one. + if (Res != Node) { + ReplaceNode(Node, Res); + } else { + EnforceNodeIdInvariant(Res); + } + + return Res; +} + +/// CheckSame - Implements OP_CheckSame. +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) { + // Accept if it is exactly the same as a previously recorded node. + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + return N == RecordedNodes[RecNo].first; +} + +/// CheckChildSame - Implements OP_CheckChildXSame. +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo), + RecordedNodes); +} + +/// CheckPatternPredicate - Implements OP_CheckPatternPredicate. +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, + const SelectionDAGISel &SDISel) { + return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); +} + +/// CheckNodePredicate - Implements OP_CheckNodePredicate. +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, + const SelectionDAGISel &SDISel, SDNode *N) { + return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDNode *N) { + uint16_t Opc = MatcherTable[MatcherIndex++]; + Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + return N->getOpcode() == Opc; +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, + const TargetLowering *TLI, const DataLayout &DL) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (N.getValueType() == VT) return true; + + // Handle the case when VT is iPTR. + return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering *TLI, const DataLayout &DL, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI, + DL); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + return cast<CondCodeSDNode>(N)->get() == + (ISD::CondCode)MatcherTable[MatcherIndex++]; +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + if (2 >= N.getNumOperands()) + return false; + return ::CheckCondCode(MatcherTable, MatcherIndex, N.getOperand(2)); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering *TLI, const DataLayout &DL) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (cast<VTSDNode>(N)->getVT() == VT) + return true; + + // Handle the case when VT is iPTR. + return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); + return C && C->getSExtValue() == Val; +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const SelectionDAGISel &SDISel) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + if (N->getOpcode() != ISD::AND) return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + return C && SDISel.CheckAndMask(N.getOperand(0), C, Val); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const SelectionDAGISel &SDISel) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + if (N->getOpcode() != ISD::OR) return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + return C && SDISel.CheckOrMask(N.getOperand(0), C, Val); +} + +/// IsPredicateKnownToFail - If we know how and can do so without pushing a +/// scope, evaluate the current node. If the current predicate is known to +/// fail, set Result=true and return anything. If the current predicate is +/// known to pass, set Result=false and return the MatcherIndex to continue +/// with. If the current predicate is unknown, set Result=false and return the +/// MatcherIndex to continue with. +static unsigned IsPredicateKnownToFail(const unsigned char *Table, + unsigned Index, SDValue N, + bool &Result, + const SelectionDAGISel &SDISel, + SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) { + switch (Table[Index++]) { + default: + Result = false; + return Index-1; // Could not evaluate this predicate. + case SelectionDAGISel::OPC_CheckSame: + Result = !::CheckSame(Table, Index, N, RecordedNodes); + return Index; + case SelectionDAGISel::OPC_CheckChild0Same: + case SelectionDAGISel::OPC_CheckChild1Same: + case SelectionDAGISel::OPC_CheckChild2Same: + case SelectionDAGISel::OPC_CheckChild3Same: + Result = !::CheckChildSame(Table, Index, N, RecordedNodes, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); + return Index; + case SelectionDAGISel::OPC_CheckPatternPredicate: + Result = !::CheckPatternPredicate(Table, Index, SDISel); + return Index; + case SelectionDAGISel::OPC_CheckPredicate: + Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode()); + return Index; + case SelectionDAGISel::OPC_CheckOpcode: + Result = !::CheckOpcode(Table, Index, N.getNode()); + return Index; + case SelectionDAGISel::OPC_CheckType: + Result = !::CheckType(Table, Index, N, SDISel.TLI, + SDISel.CurDAG->getDataLayout()); + return Index; + case SelectionDAGISel::OPC_CheckTypeRes: { + unsigned Res = Table[Index++]; + Result = !::CheckType(Table, Index, N.getValue(Res), SDISel.TLI, + SDISel.CurDAG->getDataLayout()); + return Index; + } + case SelectionDAGISel::OPC_CheckChild0Type: + case SelectionDAGISel::OPC_CheckChild1Type: + case SelectionDAGISel::OPC_CheckChild2Type: + case SelectionDAGISel::OPC_CheckChild3Type: + case SelectionDAGISel::OPC_CheckChild4Type: + case SelectionDAGISel::OPC_CheckChild5Type: + case SelectionDAGISel::OPC_CheckChild6Type: + case SelectionDAGISel::OPC_CheckChild7Type: + Result = !::CheckChildType( + Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout(), + Table[Index - 1] - SelectionDAGISel::OPC_CheckChild0Type); + return Index; + case SelectionDAGISel::OPC_CheckCondCode: + Result = !::CheckCondCode(Table, Index, N); + return Index; + case SelectionDAGISel::OPC_CheckChild2CondCode: + Result = !::CheckChild2CondCode(Table, Index, N); + return Index; + case SelectionDAGISel::OPC_CheckValueType: + Result = !::CheckValueType(Table, Index, N, SDISel.TLI, + SDISel.CurDAG->getDataLayout()); + return Index; + case SelectionDAGISel::OPC_CheckInteger: + Result = !::CheckInteger(Table, Index, N); + return Index; + case SelectionDAGISel::OPC_CheckChild0Integer: + case SelectionDAGISel::OPC_CheckChild1Integer: + case SelectionDAGISel::OPC_CheckChild2Integer: + case SelectionDAGISel::OPC_CheckChild3Integer: + case SelectionDAGISel::OPC_CheckChild4Integer: + Result = !::CheckChildInteger(Table, Index, N, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer); + return Index; + case SelectionDAGISel::OPC_CheckAndImm: + Result = !::CheckAndImm(Table, Index, N, SDISel); + return Index; + case SelectionDAGISel::OPC_CheckOrImm: + Result = !::CheckOrImm(Table, Index, N, SDISel); + return Index; + } +} + +namespace { + +struct MatchScope { + /// FailIndex - If this match fails, this is the index to continue with. + unsigned FailIndex; + + /// NodeStack - The node stack when the scope was formed. + SmallVector<SDValue, 4> NodeStack; + + /// NumRecordedNodes - The number of recorded nodes when the scope was formed. + unsigned NumRecordedNodes; + + /// NumMatchedMemRefs - The number of matched memref entries. + unsigned NumMatchedMemRefs; + + /// InputChain/InputGlue - The current chain/glue + SDValue InputChain, InputGlue; + + /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. + bool HasChainNodesMatched; +}; + +/// \A DAG update listener to keep the matching state +/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to +/// change the DAG while matching. X86 addressing mode matcher is an example +/// for this. +class MatchStateUpdater : public SelectionDAG::DAGUpdateListener +{ + SDNode **NodeToMatch; + SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes; + SmallVectorImpl<MatchScope> &MatchScopes; + +public: + MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch, + SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN, + SmallVectorImpl<MatchScope> &MS) + : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch), + RecordedNodes(RN), MatchScopes(MS) {} + + void NodeDeleted(SDNode *N, SDNode *E) override { + // Some early-returns here to avoid the search if we deleted the node or + // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we + // do, so it's unnecessary to update matching state at that point). + // Neither of these can occur currently because we only install this + // update listener during matching a complex patterns. + if (!E || E->isMachineOpcode()) + return; + // Check if NodeToMatch was updated. + if (N == *NodeToMatch) + *NodeToMatch = E; + // Performing linear search here does not matter because we almost never + // run this code. You'd have to have a CSE during complex pattern + // matching. + for (auto &I : RecordedNodes) + if (I.first.getNode() == N) + I.first.setNode(E); + + for (auto &I : MatchScopes) + for (auto &J : I.NodeStack) + if (J.getNode() == N) + J.setNode(E); + } +}; + +} // end anonymous namespace + +void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, + const unsigned char *MatcherTable, + unsigned TableSize) { + // FIXME: Should these even be selected? Handle these cases in the caller? + switch (NodeToMatch->getOpcode()) { + default: + break; + case ISD::EntryToken: // These nodes remain the same. + case ISD::BasicBlock: + case ISD::Register: + case ISD::RegisterMask: + case ISD::HANDLENODE: + case ISD::MDNODE_SDNODE: + case ISD::TargetConstant: + case ISD::TargetConstantFP: + case ISD::TargetConstantPool: + case ISD::TargetFrameIndex: + case ISD::TargetExternalSymbol: + case ISD::MCSymbol: + case ISD::TargetBlockAddress: + case ISD::TargetJumpTable: + case ISD::TargetGlobalTLSAddress: + case ISD::TargetGlobalAddress: + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::EH_LABEL: + case ISD::ANNOTATION_LABEL: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + NodeToMatch->setNodeId(-1); // Mark selected. + return; + case ISD::AssertSext: + case ISD::AssertZext: + ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); + CurDAG->RemoveDeadNode(NodeToMatch); + return; + case ISD::INLINEASM: + case ISD::INLINEASM_BR: + Select_INLINEASM(NodeToMatch, + NodeToMatch->getOpcode() == ISD::INLINEASM_BR); + return; + case ISD::READ_REGISTER: + Select_READ_REGISTER(NodeToMatch); + return; + case ISD::WRITE_REGISTER: + Select_WRITE_REGISTER(NodeToMatch); + return; + case ISD::UNDEF: + Select_UNDEF(NodeToMatch); + return; + } + + assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); + + // Set up the node stack with NodeToMatch as the only node on the stack. + SmallVector<SDValue, 8> NodeStack; + SDValue N = SDValue(NodeToMatch, 0); + NodeStack.push_back(N); + + // MatchScopes - Scopes used when matching, if a match failure happens, this + // indicates where to continue checking. + SmallVector<MatchScope, 8> MatchScopes; + + // RecordedNodes - This is the set of nodes that have been recorded by the + // state machine. The second value is the parent of the node, or null if the + // root is recorded. + SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes; + + // MatchedMemRefs - This is the set of MemRef's we've seen in the input + // pattern. + SmallVector<MachineMemOperand*, 2> MatchedMemRefs; + + // These are the current input chain and glue for use when generating nodes. + // Various Emit operations change these. For example, emitting a copytoreg + // uses and updates these. + SDValue InputChain, InputGlue; + + // ChainNodesMatched - If a pattern matches nodes that have input/output + // chains, the OPC_EmitMergeInputChains operation is emitted which indicates + // which ones they are. The result is captured into this list so that we can + // update the chain results when the pattern is complete. + SmallVector<SDNode*, 3> ChainNodesMatched; + + LLVM_DEBUG(dbgs() << "ISEL: Starting pattern match\n"); + + // Determine where to start the interpreter. Normally we start at opcode #0, + // but if the state machine starts with an OPC_SwitchOpcode, then we + // accelerate the first lookup (which is guaranteed to be hot) with the + // OpcodeOffset table. + unsigned MatcherIndex = 0; + + if (!OpcodeOffset.empty()) { + // Already computed the OpcodeOffset table, just index into it. + if (N.getOpcode() < OpcodeOffset.size()) + MatcherIndex = OpcodeOffset[N.getOpcode()]; + LLVM_DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n"); + + } else if (MatcherTable[0] == OPC_SwitchOpcode) { + // Otherwise, the table isn't computed, but the state machine does start + // with an OPC_SwitchOpcode instruction. Populate the table now, since this + // is the first time we're selecting an instruction. + unsigned Idx = 1; + while (true) { + // Get the size of this case. + unsigned CaseSize = MatcherTable[Idx++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, Idx); + if (CaseSize == 0) break; + + // Get the opcode, add the index to the table. + uint16_t Opc = MatcherTable[Idx++]; + Opc |= (unsigned short)MatcherTable[Idx++] << 8; + if (Opc >= OpcodeOffset.size()) + OpcodeOffset.resize((Opc+1)*2); + OpcodeOffset[Opc] = Idx; + Idx += CaseSize; + } + + // Okay, do the lookup for the first opcode. + if (N.getOpcode() < OpcodeOffset.size()) + MatcherIndex = OpcodeOffset[N.getOpcode()]; + } + + while (true) { + assert(MatcherIndex < TableSize && "Invalid index"); +#ifndef NDEBUG + unsigned CurrentOpcodeIndex = MatcherIndex; +#endif + BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++]; + switch (Opcode) { + case OPC_Scope: { + // Okay, the semantics of this operation are that we should push a scope + // then evaluate the first child. However, pushing a scope only to have + // the first check fail (which then pops it) is inefficient. If we can + // determine immediately that the first check (or first several) will + // immediately fail, don't even bother pushing a scope for them. + unsigned FailIndex; + + while (true) { + unsigned NumToSkip = MatcherTable[MatcherIndex++]; + if (NumToSkip & 128) + NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); + // Found the end of the scope with no match. + if (NumToSkip == 0) { + FailIndex = 0; + break; + } + + FailIndex = MatcherIndex+NumToSkip; + + unsigned MatcherIndexOfPredicate = MatcherIndex; + (void)MatcherIndexOfPredicate; // silence warning. + + // If we can't evaluate this predicate without pushing a scope (e.g. if + // it is a 'MoveParent') or if the predicate succeeds on this node, we + // push the scope and evaluate the full predicate chain. + bool Result; + MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N, + Result, *this, RecordedNodes); + if (!Result) + break; + + LLVM_DEBUG( + dbgs() << " Skipped scope entry (due to false predicate) at " + << "index " << MatcherIndexOfPredicate << ", continuing at " + << FailIndex << "\n"); + ++NumDAGIselRetries; + + // Otherwise, we know that this case of the Scope is guaranteed to fail, + // move to the next case. + MatcherIndex = FailIndex; + } + + // If the whole scope failed to match, bail. + if (FailIndex == 0) break; + + // Push a MatchScope which indicates where to go if the first child fails + // to match. + MatchScope NewEntry; + NewEntry.FailIndex = FailIndex; + NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end()); + NewEntry.NumRecordedNodes = RecordedNodes.size(); + NewEntry.NumMatchedMemRefs = MatchedMemRefs.size(); + NewEntry.InputChain = InputChain; + NewEntry.InputGlue = InputGlue; + NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); + MatchScopes.push_back(NewEntry); + continue; + } + case OPC_RecordNode: { + // Remember this node, it may end up being an operand in the pattern. + SDNode *Parent = nullptr; + if (NodeStack.size() > 1) + Parent = NodeStack[NodeStack.size()-2].getNode(); + RecordedNodes.push_back(std::make_pair(N, Parent)); + continue; + } + + case OPC_RecordChild0: case OPC_RecordChild1: + case OPC_RecordChild2: case OPC_RecordChild3: + case OPC_RecordChild4: case OPC_RecordChild5: + case OPC_RecordChild6: case OPC_RecordChild7: { + unsigned ChildNo = Opcode-OPC_RecordChild0; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + + RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo), + N.getNode())); + continue; + } + case OPC_RecordMemRef: + if (auto *MN = dyn_cast<MemSDNode>(N)) + MatchedMemRefs.push_back(MN->getMemOperand()); + else { + LLVM_DEBUG(dbgs() << "Expected MemSDNode "; N->dump(CurDAG); + dbgs() << '\n'); + } + + continue; + + case OPC_CaptureGlueInput: + // If the current node has an input glue, capture it in InputGlue. + if (N->getNumOperands() != 0 && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) + InputGlue = N->getOperand(N->getNumOperands()-1); + continue; + + case OPC_MoveChild: { + unsigned ChildNo = MatcherTable[MatcherIndex++]; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + N = N.getOperand(ChildNo); + NodeStack.push_back(N); + continue; + } + + case OPC_MoveChild0: case OPC_MoveChild1: + case OPC_MoveChild2: case OPC_MoveChild3: + case OPC_MoveChild4: case OPC_MoveChild5: + case OPC_MoveChild6: case OPC_MoveChild7: { + unsigned ChildNo = Opcode-OPC_MoveChild0; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + N = N.getOperand(ChildNo); + NodeStack.push_back(N); + continue; + } + + case OPC_MoveParent: + // Pop the current node off the NodeStack. + NodeStack.pop_back(); + assert(!NodeStack.empty() && "Node stack imbalance!"); + N = NodeStack.back(); + continue; + + case OPC_CheckSame: + if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; + continue; + + case OPC_CheckChild0Same: case OPC_CheckChild1Same: + case OPC_CheckChild2Same: case OPC_CheckChild3Same: + if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes, + Opcode-OPC_CheckChild0Same)) + break; + continue; + + case OPC_CheckPatternPredicate: + if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; + continue; + case OPC_CheckPredicate: + if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this, + N.getNode())) + break; + continue; + case OPC_CheckPredicateWithOperands: { + unsigned OpNum = MatcherTable[MatcherIndex++]; + SmallVector<SDValue, 8> Operands; + + for (unsigned i = 0; i < OpNum; ++i) + Operands.push_back(RecordedNodes[MatcherTable[MatcherIndex++]].first); + + unsigned PredNo = MatcherTable[MatcherIndex++]; + if (!CheckNodePredicateWithOperands(N.getNode(), PredNo, Operands)) + break; + continue; + } + case OPC_CheckComplexPat: { + unsigned CPNum = MatcherTable[MatcherIndex++]; + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); + + // If target can modify DAG during matching, keep the matching state + // consistent. + std::unique_ptr<MatchStateUpdater> MSU; + if (ComplexPatternFuncMutatesDAG()) + MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes, + MatchScopes)); + + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, + RecordedNodes[RecNo].first, CPNum, + RecordedNodes)) + break; + continue; + } + case OPC_CheckOpcode: + if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break; + continue; + + case OPC_CheckType: + if (!::CheckType(MatcherTable, MatcherIndex, N, TLI, + CurDAG->getDataLayout())) + break; + continue; + + case OPC_CheckTypeRes: { + unsigned Res = MatcherTable[MatcherIndex++]; + if (!::CheckType(MatcherTable, MatcherIndex, N.getValue(Res), TLI, + CurDAG->getDataLayout())) + break; + continue; + } + + case OPC_SwitchOpcode: { + unsigned CurNodeOpcode = N.getOpcode(); + unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; + unsigned CaseSize; + while (true) { + // Get the size of this case. + CaseSize = MatcherTable[MatcherIndex++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); + if (CaseSize == 0) break; + + uint16_t Opc = MatcherTable[MatcherIndex++]; + Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + + // If the opcode matches, then we will execute this case. + if (CurNodeOpcode == Opc) + break; + + // Otherwise, skip over this case. + MatcherIndex += CaseSize; + } + + // If no cases matched, bail out. + if (CaseSize == 0) break; + + // Otherwise, execute the case we found. + LLVM_DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to " + << MatcherIndex << "\n"); + continue; + } + + case OPC_SwitchType: { + MVT CurNodeVT = N.getSimpleValueType(); + unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; + unsigned CaseSize; + while (true) { + // Get the size of this case. + CaseSize = MatcherTable[MatcherIndex++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); + if (CaseSize == 0) break; + + MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (CaseVT == MVT::iPTR) + CaseVT = TLI->getPointerTy(CurDAG->getDataLayout()); + + // If the VT matches, then we will execute this case. + if (CurNodeVT == CaseVT) + break; + + // Otherwise, skip over this case. + MatcherIndex += CaseSize; + } + + // If no cases matched, bail out. + if (CaseSize == 0) break; + + // Otherwise, execute the case we found. + LLVM_DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() + << "] from " << SwitchStart << " to " << MatcherIndex + << '\n'); + continue; + } + case OPC_CheckChild0Type: case OPC_CheckChild1Type: + case OPC_CheckChild2Type: case OPC_CheckChild3Type: + case OPC_CheckChild4Type: case OPC_CheckChild5Type: + case OPC_CheckChild6Type: case OPC_CheckChild7Type: + if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, + CurDAG->getDataLayout(), + Opcode - OPC_CheckChild0Type)) + break; + continue; + case OPC_CheckCondCode: + if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; + continue; + case OPC_CheckChild2CondCode: + if (!::CheckChild2CondCode(MatcherTable, MatcherIndex, N)) break; + continue; + case OPC_CheckValueType: + if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI, + CurDAG->getDataLayout())) + break; + continue; + case OPC_CheckInteger: + if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; + continue; + case OPC_CheckChild0Integer: case OPC_CheckChild1Integer: + case OPC_CheckChild2Integer: case OPC_CheckChild3Integer: + case OPC_CheckChild4Integer: + if (!::CheckChildInteger(MatcherTable, MatcherIndex, N, + Opcode-OPC_CheckChild0Integer)) break; + continue; + case OPC_CheckAndImm: + if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break; + continue; + case OPC_CheckOrImm: + if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; + continue; + case OPC_CheckImmAllOnesV: + if (!ISD::isBuildVectorAllOnes(N.getNode())) break; + continue; + case OPC_CheckImmAllZerosV: + if (!ISD::isBuildVectorAllZeros(N.getNode())) break; + continue; + + case OPC_CheckFoldableChainNode: { + assert(NodeStack.size() != 1 && "No parent node"); + // Verify that all intermediate nodes between the root and this one have + // a single use. + bool HasMultipleUses = false; + for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) + if (!NodeStack[i].getNode()->hasOneUse()) { + HasMultipleUses = true; + break; + } + if (HasMultipleUses) break; + + // Check to see that the target thinks this is profitable to fold and that + // we can fold it without inducing cycles in the graph. + if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(), + NodeToMatch) || + !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(), + NodeToMatch, OptLevel, + true/*We validate our own chains*/)) + break; + + continue; + } + case OPC_EmitInteger: { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), + VT), nullptr)); + continue; + } + case OPC_EmitRegister: { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + unsigned RegNo = MatcherTable[MatcherIndex++]; + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getRegister(RegNo, VT), nullptr)); + continue; + } + case OPC_EmitRegister2: { + // For targets w/ more than 256 register names, the register enum + // values are stored in two bytes in the matcher table (just like + // opcodes). + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + unsigned RegNo = MatcherTable[MatcherIndex++]; + RegNo |= MatcherTable[MatcherIndex++] << 8; + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getRegister(RegNo, VT), nullptr)); + continue; + } + + case OPC_EmitConvertToTarget: { + // Convert from IMM/FPIMM to target version. + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget"); + SDValue Imm = RecordedNodes[RecNo].first; + + if (Imm->getOpcode() == ISD::Constant) { + const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue(); + Imm = CurDAG->getTargetConstant(*Val, SDLoc(NodeToMatch), + Imm.getValueType()); + } else if (Imm->getOpcode() == ISD::ConstantFP) { + const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); + Imm = CurDAG->getTargetConstantFP(*Val, SDLoc(NodeToMatch), + Imm.getValueType()); + } + + RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); + continue; + } + + case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 + case OPC_EmitMergeInputChains1_1: // OPC_EmitMergeInputChains, 1, 1 + case OPC_EmitMergeInputChains1_2: { // OPC_EmitMergeInputChains, 1, 2 + // These are space-optimized forms of OPC_EmitMergeInputChains. + assert(!InputChain.getNode() && + "EmitMergeInputChains should be the first chain producing node"); + assert(ChainNodesMatched.empty() && + "Should only have one EmitMergeInputChains per match"); + + // Read all of the chained nodes. + unsigned RecNo = Opcode - OPC_EmitMergeInputChains1_0; + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); + ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + + // FIXME: What if other value results of the node have uses not matched + // by this pattern? + if (ChainNodesMatched.back() != NodeToMatch && + !RecordedNodes[RecNo].first.hasOneUse()) { + ChainNodesMatched.clear(); + break; + } + + // Merge the input chains if they are not intra-pattern references. + InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + + if (!InputChain.getNode()) + break; // Failed to merge. + continue; + } + + case OPC_EmitMergeInputChains: { + assert(!InputChain.getNode() && + "EmitMergeInputChains should be the first chain producing node"); + // This node gets a list of nodes we matched in the input that have + // chains. We want to token factor all of the input chains to these nodes + // together. However, if any of the input chains is actually one of the + // nodes matched in this pattern, then we have an intra-match reference. + // Ignore these because the newly token factored chain should not refer to + // the old nodes. + unsigned NumChains = MatcherTable[MatcherIndex++]; + assert(NumChains != 0 && "Can't TF zero chains"); + + assert(ChainNodesMatched.empty() && + "Should only have one EmitMergeInputChains per match"); + + // Read all of the chained nodes. + for (unsigned i = 0; i != NumChains; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); + ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + + // FIXME: What if other value results of the node have uses not matched + // by this pattern? + if (ChainNodesMatched.back() != NodeToMatch && + !RecordedNodes[RecNo].first.hasOneUse()) { + ChainNodesMatched.clear(); + break; + } + } + + // If the inner loop broke out, the match fails. + if (ChainNodesMatched.empty()) + break; + + // Merge the input chains if they are not intra-pattern references. + InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + + if (!InputChain.getNode()) + break; // Failed to merge. + + continue; + } + + case OPC_EmitCopyToReg: + case OPC_EmitCopyToReg2: { + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); + unsigned DestPhysReg = MatcherTable[MatcherIndex++]; + if (Opcode == OPC_EmitCopyToReg2) + DestPhysReg |= MatcherTable[MatcherIndex++] << 8; + + if (!InputChain.getNode()) + InputChain = CurDAG->getEntryNode(); + + InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch), + DestPhysReg, RecordedNodes[RecNo].first, + InputGlue); + + InputGlue = InputChain.getValue(1); + continue; + } + + case OPC_EmitNodeXForm: { + unsigned XFormNo = MatcherTable[MatcherIndex++]; + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); + SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr)); + continue; + } + case OPC_Coverage: { + // This is emitted right before MorphNode/EmitNode. + // So it should be safe to assume that this node has been selected + unsigned index = MatcherTable[MatcherIndex++]; + index |= (MatcherTable[MatcherIndex++] << 8); + dbgs() << "COVERED: " << getPatternForIndex(index) << "\n"; + dbgs() << "INCLUDED: " << getIncludePathForIndex(index) << "\n"; + continue; + } + + case OPC_EmitNode: case OPC_MorphNodeTo: + case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2: + case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: { + uint16_t TargetOpc = MatcherTable[MatcherIndex++]; + TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; + // Get the result VT list. + unsigned NumVTs; + // If this is one of the compressed forms, get the number of VTs based + // on the Opcode. Otherwise read the next byte from the table. + if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2) + NumVTs = Opcode - OPC_MorphNodeTo0; + else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2) + NumVTs = Opcode - OPC_EmitNode0; + else + NumVTs = MatcherTable[MatcherIndex++]; + SmallVector<EVT, 4> VTs; + for (unsigned i = 0; i != NumVTs; ++i) { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (VT == MVT::iPTR) + VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy; + VTs.push_back(VT); + } + + if (EmitNodeInfo & OPFL_Chain) + VTs.push_back(MVT::Other); + if (EmitNodeInfo & OPFL_GlueOutput) + VTs.push_back(MVT::Glue); + + // This is hot code, so optimize the two most common cases of 1 and 2 + // results. + SDVTList VTList; + if (VTs.size() == 1) + VTList = CurDAG->getVTList(VTs[0]); + else if (VTs.size() == 2) + VTList = CurDAG->getVTList(VTs[0], VTs[1]); + else + VTList = CurDAG->getVTList(VTs); + + // Get the operand list. + unsigned NumOps = MatcherTable[MatcherIndex++]; + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != NumOps; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + if (RecNo & 128) + RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); + + assert(RecNo < RecordedNodes.size() && "Invalid EmitNode"); + Ops.push_back(RecordedNodes[RecNo].first); + } + + // If there are variadic operands to add, handle them now. + if (EmitNodeInfo & OPFL_VariadicInfo) { + // Determine the start index to copy from. + unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo); + FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0; + assert(NodeToMatch->getNumOperands() >= FirstOpToCopy && + "Invalid variadic node"); + // Copy all of the variadic operands, not including a potential glue + // input. + for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands(); + i != e; ++i) { + SDValue V = NodeToMatch->getOperand(i); + if (V.getValueType() == MVT::Glue) break; + Ops.push_back(V); + } + } + + // If this has chain/glue inputs, add them. + if (EmitNodeInfo & OPFL_Chain) + Ops.push_back(InputChain); + if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) + Ops.push_back(InputGlue); + + // Create the node. + MachineSDNode *Res = nullptr; + bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo || + (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2); + if (!IsMorphNodeTo) { + // If this is a normal EmitNode command, just create the new node and + // add the results to the RecordedNodes list. + Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch), + VTList, Ops); + + // Add all the non-glue/non-chain results to the RecordedNodes list. + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break; + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i), + nullptr)); + } + } else { + assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE && + "NodeToMatch was removed partway through selection"); + SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N, + SDNode *E) { + CurDAG->salvageDebugInfo(*N); + auto &Chain = ChainNodesMatched; + assert((!E || !is_contained(Chain, N)) && + "Chain node replaced during MorphNode"); + Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end()); + }); + Res = cast<MachineSDNode>(MorphNode(NodeToMatch, TargetOpc, VTList, + Ops, EmitNodeInfo)); + } + + // If the node had chain/glue results, update our notion of the current + // chain and glue. + if (EmitNodeInfo & OPFL_GlueOutput) { + InputGlue = SDValue(Res, VTs.size()-1); + if (EmitNodeInfo & OPFL_Chain) + InputChain = SDValue(Res, VTs.size()-2); + } else if (EmitNodeInfo & OPFL_Chain) + InputChain = SDValue(Res, VTs.size()-1); + + // If the OPFL_MemRefs glue is set on this node, slap all of the + // accumulated memrefs onto it. + // + // FIXME: This is vastly incorrect for patterns with multiple outputs + // instructions that access memory and for ComplexPatterns that match + // loads. + if (EmitNodeInfo & OPFL_MemRefs) { + // Only attach load or store memory operands if the generated + // instruction may load or store. + const MCInstrDesc &MCID = TII->get(TargetOpc); + bool mayLoad = MCID.mayLoad(); + bool mayStore = MCID.mayStore(); + + // We expect to have relatively few of these so just filter them into a + // temporary buffer so that we can easily add them to the instruction. + SmallVector<MachineMemOperand *, 4> FilteredMemRefs; + for (MachineMemOperand *MMO : MatchedMemRefs) { + if (MMO->isLoad()) { + if (mayLoad) + FilteredMemRefs.push_back(MMO); + } else if (MMO->isStore()) { + if (mayStore) + FilteredMemRefs.push_back(MMO); + } else { + FilteredMemRefs.push_back(MMO); + } + } + + CurDAG->setNodeMemRefs(Res, FilteredMemRefs); + } + + LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs() + << " Dropping mem operands\n"; + dbgs() << " " << (IsMorphNodeTo ? "Morphed" : "Created") + << " node: "; + Res->dump(CurDAG);); + + // If this was a MorphNodeTo then we're completely done! + if (IsMorphNodeTo) { + // Update chain uses. + UpdateChains(Res, InputChain, ChainNodesMatched, true); + return; + } + continue; + } + + case OPC_CompleteMatch: { + // The match has been completed, and any new nodes (if any) have been + // created. Patch up references to the matched dag to use the newly + // created nodes. + unsigned NumResults = MatcherTable[MatcherIndex++]; + + for (unsigned i = 0; i != NumResults; ++i) { + unsigned ResSlot = MatcherTable[MatcherIndex++]; + if (ResSlot & 128) + ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); + + assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch"); + SDValue Res = RecordedNodes[ResSlot].first; + + assert(i < NodeToMatch->getNumValues() && + NodeToMatch->getValueType(i) != MVT::Other && + NodeToMatch->getValueType(i) != MVT::Glue && + "Invalid number of results to complete!"); + assert((NodeToMatch->getValueType(i) == Res.getValueType() || + NodeToMatch->getValueType(i) == MVT::iPTR || + Res.getValueType() == MVT::iPTR || + NodeToMatch->getValueType(i).getSizeInBits() == + Res.getValueSizeInBits()) && + "invalid replacement"); + ReplaceUses(SDValue(NodeToMatch, i), Res); + } + + // Update chain uses. + UpdateChains(NodeToMatch, InputChain, ChainNodesMatched, false); + + // If the root node defines glue, we need to update it to the glue result. + // TODO: This never happens in our tests and I think it can be removed / + // replaced with an assert, but if we do it this the way the change is + // NFC. + if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) == + MVT::Glue && + InputGlue.getNode()) + ReplaceUses(SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), + InputGlue); + + assert(NodeToMatch->use_empty() && + "Didn't replace all uses of the node?"); + CurDAG->RemoveDeadNode(NodeToMatch); + + return; + } + } + + // If the code reached this point, then the match failed. See if there is + // another child to try in the current 'Scope', otherwise pop it until we + // find a case to check. + LLVM_DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex + << "\n"); + ++NumDAGIselRetries; + while (true) { + if (MatchScopes.empty()) { + CannotYetSelect(NodeToMatch); + return; + } + + // Restore the interpreter state back to the point where the scope was + // formed. + MatchScope &LastScope = MatchScopes.back(); + RecordedNodes.resize(LastScope.NumRecordedNodes); + NodeStack.clear(); + NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end()); + N = NodeStack.back(); + + if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size()) + MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); + MatcherIndex = LastScope.FailIndex; + + LLVM_DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n"); + + InputChain = LastScope.InputChain; + InputGlue = LastScope.InputGlue; + if (!LastScope.HasChainNodesMatched) + ChainNodesMatched.clear(); + + // Check to see what the offset is at the new MatcherIndex. If it is zero + // we have reached the end of this scope, otherwise we have another child + // in the current scope to try. + unsigned NumToSkip = MatcherTable[MatcherIndex++]; + if (NumToSkip & 128) + NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); + + // If we have another child in this scope to match, update FailIndex and + // try it. + if (NumToSkip != 0) { + LastScope.FailIndex = MatcherIndex+NumToSkip; + break; + } + + // End of this scope, pop it and try the next child in the containing + // scope. + MatchScopes.pop_back(); + } + } +} + +bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const { + assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); + auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!C) + return false; + + // Detect when "or" is used to add an offset to a stack object. + if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) { + MachineFrameInfo &MFI = MF->getFrameInfo(); + unsigned A = MFI.getObjectAlignment(FN->getIndex()); + assert(isPowerOf2_32(A) && "Unexpected alignment"); + int32_t Off = C->getSExtValue(); + // If the alleged offset fits in the zero bits guaranteed by + // the alignment, then this or is really an add. + return (Off >= 0) && (((A - 1) & Off) == unsigned(Off)); + } + return false; +} + +void SelectionDAGISel::CannotYetSelect(SDNode *N) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Cannot select: "; + + if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN && + N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && + N->getOpcode() != ISD::INTRINSIC_VOID) { + N->printrFull(Msg, CurDAG); + Msg << "\nIn function: " << MF->getName(); + } else { + bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; + unsigned iid = + cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue(); + if (iid < Intrinsic::num_intrinsics) + Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid, None); + else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo()) + Msg << "target intrinsic %" << TII->getName(iid); + else + Msg << "unknown intrinsic #" << iid; + } + report_fatal_error(Msg.str()); +} + +char SelectionDAGISel::ID = 0; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp new file mode 100644 index 0000000000000..cdc09d59f6a49 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -0,0 +1,304 @@ +//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +#define DEBUG_TYPE "dag-printer" + +namespace llvm { + template<> + struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits { + + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} + + static bool hasEdgeDestLabels() { + return true; + } + + static unsigned numEdgeDestLabels(const void *Node) { + return ((const SDNode *) Node)->getNumValues(); + } + + static std::string getEdgeDestLabel(const void *Node, unsigned i) { + return ((const SDNode *) Node)->getValueType(i).getEVTString(); + } + + template<typename EdgeIter> + static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { + return itostr(I - SDNodeIterator::begin((const SDNode *) Node)); + } + + /// edgeTargetsEdgeSource - This method returns true if this outgoing edge + /// should actually target another edge source, not a node. If this method + /// is implemented, getEdgeTarget should be implemented. + template<typename EdgeIter> + static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) { + return true; + } + + /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is + /// called to determine which outgoing edge of Node is the target of this + /// edge. + template<typename EdgeIter> + static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) { + SDNode *TargetNode = *I; + SDNodeIterator NI = SDNodeIterator::begin(TargetNode); + std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo()); + return NI; + } + + static std::string getGraphName(const SelectionDAG *G) { + return G->getMachineFunction().getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static std::string getNodeIdentifierLabel(const SDNode *Node, + const SelectionDAG *Graph) { + std::string R; + raw_string_ostream OS(R); +#ifndef NDEBUG + OS << 't' << Node->PersistentId; +#else + OS << static_cast<const void *>(Node); +#endif + return R; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + template<typename EdgeIter> + static std::string getEdgeAttributes(const void *Node, EdgeIter EI, + const SelectionDAG *Graph) { + SDValue Op = EI.getNode()->getOperand(EI.getOperand()); + EVT VT = Op.getValueType(); + if (VT == MVT::Glue) + return "color=red,style=bold"; + else if (VT == MVT::Other) + return "color=blue,style=dashed"; + return ""; + } + + + static std::string getSimpleNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + std::string Result = Node->getOperationName(G); + { + raw_string_ostream OS(Result); + Node->print_details(OS, G); + } + return Result; + } + std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph); + static std::string getNodeAttributes(const SDNode *N, + const SelectionDAG *Graph) { +#ifndef NDEBUG + const std::string &Attrs = Graph->getGraphAttrs(N); + if (!Attrs.empty()) { + if (Attrs.find("shape=") == std::string::npos) + return std::string("shape=Mrecord,") + Attrs; + else + return Attrs; + } +#endif + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(SelectionDAG *G, + GraphWriter<SelectionDAG*> &GW) { + GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot"); + if (G->getRoot().getNode()) + GW.emitEdge(nullptr, -1, G->getRoot().getNode(), G->getRoot().getResNo(), + "color=blue,style=dashed"); + } + }; +} + +std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G); +} + + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void SelectionDAG::viewGraph(const std::string &Title) { +// This code is only for debugging! +#ifndef NDEBUG + ViewGraph(this, "dag." + getMachineFunction().getName(), + false, Title); +#else + errs() << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +// This overload is defined out-of-line here instead of just using a +// default parameter because this is easiest for gdb to call. +void SelectionDAG::viewGraph() { + viewGraph(""); +} + +/// clearGraphAttrs - Clear all previously defined node graph attributes. +/// Intended to be used from a debugging tool (eg. gdb). +void SelectionDAG::clearGraphAttrs() { +#ifndef NDEBUG + NodeGraphAttrs.clear(); +#else + errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".) +/// +void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { +#ifndef NDEBUG + NodeGraphAttrs[N] = Attrs; +#else + errs() << "SelectionDAG::setGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".) +/// Used from getNodeAttributes. +const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { +#ifndef NDEBUG + std::map<const SDNode *, std::string>::const_iterator I = + NodeGraphAttrs.find(N); + + if (I != NodeGraphAttrs.end()) + return I->second; + else + return ""; +#else + errs() << "SelectionDAG::getGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; + return std::string(); +#endif +} + +/// setGraphColor - Convenience for setting node color attribute. +/// +void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { +#ifndef NDEBUG + NodeGraphAttrs[N] = std::string("color=") + Color; +#else + errs() << "SelectionDAG::setGraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +/// setSubgraphColorHelper - Implement setSubgraphColor. Return +/// whether we truncated the search. +/// +bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited, + int level, bool &printed) { + bool hit_limit = false; + +#ifndef NDEBUG + if (level >= 20) { + if (!printed) { + printed = true; + LLVM_DEBUG(dbgs() << "setSubgraphColor hit max level\n"); + } + return true; + } + + unsigned oldSize = visited.size(); + visited.insert(N); + if (visited.size() != oldSize) { + setGraphColor(N, Color); + for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N); + i != iend; + ++i) { + hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit; + } + } +#else + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif + return hit_limit; +} + +/// setSubgraphColor - Convenience for setting subgraph color attribute. +/// +void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) { +#ifndef NDEBUG + DenseSet<SDNode *> visited; + bool printed = false; + if (setSubgraphColorHelper(N, Color, visited, 0, printed)) { + // Visually mark that we hit the limit + if (strcmp(Color, "red") == 0) { + setSubgraphColorHelper(N, "blue", visited, 0, printed); + } else if (strcmp(Color, "yellow") == 0) { + setSubgraphColorHelper(N, "green", visited, 0, printed); + } + } + +#else + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream O(s); + O << "SU(" << SU->NodeNum << "): "; + if (SU->getNode()) { + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { + O << DOTGraphTraits<SelectionDAG*> + ::getSimpleNodeLabel(GluedNodes.back(), DAG); + GluedNodes.pop_back(); + if (!GluedNodes.empty()) + O << "\n "; + } + } else { + O << "CROSS RC COPY"; + } + return O.str(); +} + +void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const { + if (DAG) { + // Draw a special "GraphRoot" node to indicate the root of the graph. + GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot"); + const SDNode *N = DAG->getRoot().getNode(); + if (N && N->getNodeId() != -1) + GW.emitEdge(nullptr, -1, &SUnits[N->getNodeId()], -1, + "color=blue,style=dashed"); + } +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp new file mode 100644 index 0000000000000..3a2df6f60593a --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp @@ -0,0 +1,17 @@ +//===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAGTargetInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +using namespace llvm; + +SelectionDAGTargetInfo::~SelectionDAGTargetInfo() = default; diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp new file mode 100644 index 0000000000000..fad98b6f50dc1 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -0,0 +1,1067 @@ +//===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file includes support code use by SelectionDAGBuilder when lowering a +// statepoint sequence in SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#include "StatepointLowering.h" +#include "SelectionDAGBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <tuple> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "statepoint-lowering" + +STATISTIC(NumSlotsAllocatedForStatepoints, + "Number of stack slots allocated for statepoints"); +STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); +STATISTIC(StatepointMaxSlotsRequired, + "Maximum number of stack slots required for a singe statepoint"); + +static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops, + SelectionDAGBuilder &Builder, uint64_t Value) { + SDLoc L = Builder.getCurSDLoc(); + Ops.push_back(Builder.DAG.getTargetConstant(StackMaps::ConstantOp, L, + MVT::i64)); + Ops.push_back(Builder.DAG.getTargetConstant(Value, L, MVT::i64)); +} + +void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { + // Consistency check + assert(PendingGCRelocateCalls.empty() && + "Trying to visit statepoint before finished processing previous one"); + Locations.clear(); + NextSlotToAllocate = 0; + // Need to resize this on each safepoint - we need the two to stay in sync and + // the clear patterns of a SelectionDAGBuilder have no relation to + // FunctionLoweringInfo. Also need to ensure used bits get cleared. + AllocatedStackSlots.clear(); + AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size()); +} + +void StatepointLoweringState::clear() { + Locations.clear(); + AllocatedStackSlots.clear(); + assert(PendingGCRelocateCalls.empty() && + "cleared before statepoint sequence completed"); +} + +SDValue +StatepointLoweringState::allocateStackSlot(EVT ValueType, + SelectionDAGBuilder &Builder) { + NumSlotsAllocatedForStatepoints++; + MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + + unsigned SpillSize = ValueType.getStoreSize(); + assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?"); + + // First look for a previously created stack slot which is not in + // use (accounting for the fact arbitrary slots may already be + // reserved), or to create a new stack slot and use it. + + const size_t NumSlots = AllocatedStackSlots.size(); + assert(NextSlotToAllocate <= NumSlots && "Broken invariant"); + + assert(AllocatedStackSlots.size() == + Builder.FuncInfo.StatepointStackSlots.size() && + "Broken invariant"); + + for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) { + if (!AllocatedStackSlots.test(NextSlotToAllocate)) { + const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; + if (MFI.getObjectSize(FI) == SpillSize) { + AllocatedStackSlots.set(NextSlotToAllocate); + // TODO: Is ValueType the right thing to use here? + return Builder.DAG.getFrameIndex(FI, ValueType); + } + } + } + + // Couldn't find a free slot, so create a new one: + + SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); + const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + MFI.markAsStatepointSpillSlotObjectIndex(FI); + + Builder.FuncInfo.StatepointStackSlots.push_back(FI); + AllocatedStackSlots.resize(AllocatedStackSlots.size()+1, true); + assert(AllocatedStackSlots.size() == + Builder.FuncInfo.StatepointStackSlots.size() && + "Broken invariant"); + + StatepointMaxSlotsRequired.updateMax( + Builder.FuncInfo.StatepointStackSlots.size()); + + return SpillSlot; +} + +/// Utility function for reservePreviousStackSlotForValue. Tries to find +/// stack slot index to which we have spilled value for previous statepoints. +/// LookUpDepth specifies maximum DFS depth this function is allowed to look. +static Optional<int> findPreviousSpillSlot(const Value *Val, + SelectionDAGBuilder &Builder, + int LookUpDepth) { + // Can not look any further - give up now + if (LookUpDepth <= 0) + return None; + + // Spill location is known for gc relocates + if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) { + const auto &SpillMap = + Builder.FuncInfo.StatepointSpillMaps[Relocate->getStatepoint()]; + + auto It = SpillMap.find(Relocate->getDerivedPtr()); + if (It == SpillMap.end()) + return None; + + return It->second; + } + + // Look through bitcast instructions. + if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val)) + return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1); + + // Look through phi nodes + // All incoming values should have same known stack slot, otherwise result + // is unknown. + if (const PHINode *Phi = dyn_cast<PHINode>(Val)) { + Optional<int> MergedResult = None; + + for (auto &IncomingValue : Phi->incoming_values()) { + Optional<int> SpillSlot = + findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); + if (!SpillSlot.hasValue()) + return None; + + if (MergedResult.hasValue() && *MergedResult != *SpillSlot) + return None; + + MergedResult = SpillSlot; + } + return MergedResult; + } + + // TODO: We can do better for PHI nodes. In cases like this: + // ptr = phi(relocated_pointer, not_relocated_pointer) + // statepoint(ptr) + // We will return that stack slot for ptr is unknown. And later we might + // assign different stack slots for ptr and relocated_pointer. This limits + // llvm's ability to remove redundant stores. + // Unfortunately it's hard to accomplish in current infrastructure. + // We use this function to eliminate spill store completely, while + // in example we still need to emit store, but instead of any location + // we need to use special "preferred" location. + + // TODO: handle simple updates. If a value is modified and the original + // value is no longer live, it would be nice to put the modified value in the + // same slot. This allows folding of the memory accesses for some + // instructions types (like an increment). + // statepoint (i) + // i1 = i+1 + // statepoint (i1) + // However we need to be careful for cases like this: + // statepoint(i) + // i1 = i+1 + // statepoint(i, i1) + // Here we want to reserve spill slot for 'i', but not for 'i+1'. If we just + // put handling of simple modifications in this function like it's done + // for bitcasts we might end up reserving i's slot for 'i+1' because order in + // which we visit values is unspecified. + + // Don't know any information about this instruction + return None; +} + +/// Try to find existing copies of the incoming values in stack slots used for +/// statepoint spilling. If we can find a spill slot for the incoming value, +/// mark that slot as allocated, and reuse the same slot for this safepoint. +/// This helps to avoid series of loads and stores that only serve to reshuffle +/// values on the stack between calls. +static void reservePreviousStackSlotForValue(const Value *IncomingValue, + SelectionDAGBuilder &Builder) { + SDValue Incoming = Builder.getValue(IncomingValue); + + if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) { + // We won't need to spill this, so no need to check for previously + // allocated stack slots + return; + } + + SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming); + if (OldLocation.getNode()) + // Duplicates in input + return; + + const int LookUpDepth = 6; + Optional<int> Index = + findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth); + if (!Index.hasValue()) + return; + + const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots; + + auto SlotIt = find(StatepointSlots, *Index); + assert(SlotIt != StatepointSlots.end() && + "Value spilled to the unknown stack slot"); + + // This is one of our dedicated lowering slots + const int Offset = std::distance(StatepointSlots.begin(), SlotIt); + if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { + // stack slot already assigned to someone else, can't use it! + // TODO: currently we reserve space for gc arguments after doing + // normal allocation for deopt arguments. We should reserve for + // _all_ deopt and gc arguments, then start allocating. This + // will prevent some moves being inserted when vm state changes, + // but gc state doesn't between two calls. + return; + } + // Reserve this stack slot + Builder.StatepointLowering.reserveStackSlot(Offset); + + // Cache this slot so we find it when going through the normal + // assignment loop. + SDValue Loc = + Builder.DAG.getTargetFrameIndex(*Index, Builder.getFrameIndexTy()); + Builder.StatepointLowering.setLocation(Incoming, Loc); +} + +/// Remove any duplicate (as SDValues) from the derived pointer pairs. This +/// is not required for correctness. It's purpose is to reduce the size of +/// StackMap section. It has no effect on the number of spill slots required +/// or the actual lowering. +static void +removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases, + SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const GCRelocateInst *> &Relocs, + SelectionDAGBuilder &Builder, + FunctionLoweringInfo::StatepointSpillMap &SSM) { + DenseMap<SDValue, const Value *> Seen; + + SmallVector<const Value *, 64> NewBases, NewPtrs; + SmallVector<const GCRelocateInst *, 64> NewRelocs; + for (size_t i = 0, e = Ptrs.size(); i < e; i++) { + SDValue SD = Builder.getValue(Ptrs[i]); + auto SeenIt = Seen.find(SD); + + if (SeenIt == Seen.end()) { + // Only add non-duplicates + NewBases.push_back(Bases[i]); + NewPtrs.push_back(Ptrs[i]); + NewRelocs.push_back(Relocs[i]); + Seen[SD] = Ptrs[i]; + } else { + // Duplicate pointer found, note in SSM and move on: + SSM.DuplicateMap[Ptrs[i]] = SeenIt->second; + } + } + assert(Bases.size() >= NewBases.size()); + assert(Ptrs.size() >= NewPtrs.size()); + assert(Relocs.size() >= NewRelocs.size()); + Bases = NewBases; + Ptrs = NewPtrs; + Relocs = NewRelocs; + assert(Ptrs.size() == Bases.size()); + assert(Ptrs.size() == Relocs.size()); +} + +/// Extract call from statepoint, lower it and return pointer to the +/// call node. Also update NodeMap so that getValue(statepoint) will +/// reference lowered call result +static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo( + SelectionDAGBuilder::StatepointLoweringInfo &SI, + SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) { + SDValue ReturnValue, CallEndVal; + std::tie(ReturnValue, CallEndVal) = + Builder.lowerInvokable(SI.CLI, SI.EHPadBB); + SDNode *CallEnd = CallEndVal.getNode(); + + // Get a call instruction from the call sequence chain. Tail calls are not + // allowed. The following code is essentially reverse engineering X86's + // LowerCallTo. + // + // We are expecting DAG to have the following form: + // + // ch = eh_label (only in case of invoke statepoint) + // ch, glue = callseq_start ch + // ch, glue = X86::Call ch, glue + // ch, glue = callseq_end ch, glue + // get_return_value ch, glue + // + // get_return_value can either be a sequence of CopyFromReg instructions + // to grab the return value from the return register(s), or it can be a LOAD + // to load a value returned by reference via a stack slot. + + bool HasDef = !SI.CLI.RetTy->isVoidTy(); + if (HasDef) { + if (CallEnd->getOpcode() == ISD::LOAD) + CallEnd = CallEnd->getOperand(0).getNode(); + else + while (CallEnd->getOpcode() == ISD::CopyFromReg) + CallEnd = CallEnd->getOperand(0).getNode(); + } + + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); + return std::make_pair(ReturnValue, CallEnd->getOperand(0).getNode()); +} + +static MachineMemOperand* getMachineMemOperand(MachineFunction &MF, + FrameIndexSDNode &FI) { + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI.getIndex()); + auto MMOFlags = MachineMemOperand::MOStore | + MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; + auto &MFI = MF.getFrameInfo(); + return MF.getMachineMemOperand(PtrInfo, MMOFlags, + MFI.getObjectSize(FI.getIndex()), + MFI.getObjectAlignment(FI.getIndex())); +} + +/// Spill a value incoming to the statepoint. It might be either part of +/// vmstate +/// or gcstate. In both cases unconditionally spill it on the stack unless it +/// is a null constant. Return pair with first element being frame index +/// containing saved value and second element with outgoing chain from the +/// emitted store +static std::tuple<SDValue, SDValue, MachineMemOperand*> +spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, + SelectionDAGBuilder &Builder) { + SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); + MachineMemOperand* MMO = nullptr; + + // Emit new store if we didn't do it for this ptr before + if (!Loc.getNode()) { + Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(), + Builder); + int Index = cast<FrameIndexSDNode>(Loc)->getIndex(); + // We use TargetFrameIndex so that isel will not select it into LEA + Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); + + // Right now we always allocate spill slots that are of the same + // size as the value we're about to spill (the size of spillee can + // vary since we spill vectors of pointers too). At some point we + // can consider allowing spills of smaller values to larger slots + // (i.e. change the '==' in the assert below to a '>='). + MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() && + "Bad spill: stack slot does not match!"); + + // Note: Using the alignment of the spill slot (rather than the abi or + // preferred alignment) is required for correctness when dealing with spill + // slots with preferred alignments larger than frame alignment.. + auto &MF = Builder.DAG.getMachineFunction(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); + auto *StoreMMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + MFI.getObjectSize(Index), + MFI.getObjectAlignment(Index)); + Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, + StoreMMO); + + MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc)); + + Builder.StatepointLowering.setLocation(Incoming, Loc); + } + + assert(Loc.getNode()); + return std::make_tuple(Loc, Chain, MMO); +} + +/// Lower a single value incoming to a statepoint node. This value can be +/// either a deopt value or a gc value, the handling is the same. We special +/// case constants and allocas, then fall back to spilling if required. +static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, + SmallVectorImpl<SDValue> &Ops, + SmallVectorImpl<MachineMemOperand*> &MemRefs, + SelectionDAGBuilder &Builder) { + // Note: We know all of these spills are independent, but don't bother to + // exploit that chain wise. DAGCombine will happily do so as needed, so + // doing it here would be a small compile time win at most. + SDValue Chain = Builder.getRoot(); + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) { + // If the original value was a constant, make sure it gets recorded as + // such in the stackmap. This is required so that the consumer can + // parse any internal format to the deopt state. It also handles null + // pointers and other constant pointers in GC states. Note the constant + // vectors do not appear to actually hit this path and that anything larger + // than an i64 value (not type!) will fail asserts here. + pushStackMapConstant(Ops, Builder, C->getSExtValue()); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint (this is only + // really meaningful for a deopt value. For GC, we'd be trying to + // relocate the address of the alloca itself?) + assert(Incoming.getValueType() == Builder.getFrameIndexTy() && + "Incoming value is a frame index!"); + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Builder.getFrameIndexTy())); + + auto &MF = Builder.DAG.getMachineFunction(); + auto *MMO = getMachineMemOperand(MF, *FI); + MemRefs.push_back(MMO); + + } else if (LiveInOnly) { + // If this value is live in (not live-on-return, or live-through), we can + // treat it the same way patchpoint treats it's "live in" values. We'll + // end up folding some of these into stack references, but they'll be + // handled by the register allocator. Note that we do not have the notion + // of a late use so these values might be placed in registers which are + // clobbered by the call. This is fine for live-in. + Ops.push_back(Incoming); + } else { + // Otherwise, locate a spill slot and explicitly spill it so it + // can be found by the runtime later. We currently do not support + // tracking values through callee saved registers to their eventual + // spill location. This would be a useful optimization, but would + // need to be optional since it requires a lot of complexity on the + // runtime side which not all would support. + auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder); + Ops.push_back(std::get<0>(Res)); + if (auto *MMO = std::get<2>(Res)) + MemRefs.push_back(MMO); + Chain = std::get<1>(Res);; + } + + Builder.DAG.setRoot(Chain); +} + +/// Lower deopt state and gc pointer arguments of the statepoint. The actual +/// lowering is described in lowerIncomingStatepointValue. This function is +/// responsible for lowering everything in the right position and playing some +/// tricks to avoid redundant stack manipulation where possible. On +/// completion, 'Ops' will contain ready to use operands for machine code +/// statepoint. The chain nodes will have already been created and the DAG root +/// will be set to the last value spilled (if any were). +static void +lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, + SmallVectorImpl<MachineMemOperand*> &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI, + SelectionDAGBuilder &Builder) { + // Lower the deopt and gc arguments for this statepoint. Layout will be: + // deopt argument length, deopt arguments.., gc arguments... +#ifndef NDEBUG + if (auto *GFI = Builder.GFI) { + // Check that each of the gc pointer and bases we've gotten out of the + // safepoint is something the strategy thinks might be a pointer (or vector + // of pointers) into the GC heap. This is basically just here to help catch + // errors during statepoint insertion. TODO: This should actually be in the + // Verifier, but we can't get to the GCStrategy from there (yet). + GCStrategy &S = GFI->getStrategy(); + for (const Value *V : SI.Bases) { + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed base pointer found in statepoint"); + } + } + for (const Value *V : SI.Ptrs) { + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed derived pointer found in statepoint"); + } + } + assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!"); + } else { + assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!"); + assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!"); + } +#endif + + // Figure out what lowering strategy we're going to use for each part + // Note: Is is conservatively correct to lower both "live-in" and "live-out" + // as "live-through". A "live-through" variable is one which is "live-in", + // "live-out", and live throughout the lifetime of the call (i.e. we can find + // it from any PC within the transitive callee of the statepoint). In + // particular, if the callee spills callee preserved registers we may not + // be able to find a value placed in that register during the call. This is + // fine for live-out, but not for live-through. If we were willing to make + // assumptions about the code generator producing the callee, we could + // potentially allow live-through values in callee saved registers. + const bool LiveInDeopt = + SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn; + + auto isGCValue =[&](const Value *V) { + return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V); + }; + + // Before we actually start lowering (and allocating spill slots for values), + // reserve any stack slots which we judge to be profitable to reuse for a + // particular value. This is purely an optimization over the code below and + // doesn't change semantics at all. It is important for performance that we + // reserve slots for both deopt and gc values before lowering either. + for (const Value *V : SI.DeoptState) { + if (!LiveInDeopt || isGCValue(V)) + reservePreviousStackSlotForValue(V, Builder); + } + for (unsigned i = 0; i < SI.Bases.size(); ++i) { + reservePreviousStackSlotForValue(SI.Bases[i], Builder); + reservePreviousStackSlotForValue(SI.Ptrs[i], Builder); + } + + // First, prefix the list with the number of unique values to be + // lowered. Note that this is the number of *Values* not the + // number of SDValues required to lower them. + const int NumVMSArgs = SI.DeoptState.size(); + pushStackMapConstant(Ops, Builder, NumVMSArgs); + + // The vm state arguments are lowered in an opaque manner. We do not know + // what type of values are contained within. + for (const Value *V : SI.DeoptState) { + SDValue Incoming; + // If this is a function argument at a static frame index, generate it as + // the frame index. + if (const Argument *Arg = dyn_cast<Argument>(V)) { + int FI = Builder.FuncInfo.getArgumentFrameIndex(Arg); + if (FI != INT_MAX) + Incoming = Builder.DAG.getFrameIndex(FI, Builder.getFrameIndexTy()); + } + if (!Incoming.getNode()) + Incoming = Builder.getValue(V); + const bool LiveInValue = LiveInDeopt && !isGCValue(V); + lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder); + } + + // Finally, go ahead and lower all the gc arguments. There's no prefixed + // length for this one. After lowering, we'll have the base and pointer + // arrays interwoven with each (lowered) base pointer immediately followed by + // it's (lowered) derived pointer. i.e + // (base[0], ptr[0], base[1], ptr[1], ...) + for (unsigned i = 0; i < SI.Bases.size(); ++i) { + const Value *Base = SI.Bases[i]; + lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false, + Ops, MemRefs, Builder); + + const Value *Ptr = SI.Ptrs[i]; + lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false, + Ops, MemRefs, Builder); + } + + // If there are any explicit spill slots passed to the statepoint, record + // them, but otherwise do not do anything special. These are user provided + // allocas and give control over placement to the consumer. In this case, + // it is the contents of the slot which may get updated, not the pointer to + // the alloca + for (Value *V : SI.GCArgs) { + SDValue Incoming = Builder.getValue(V); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint + assert(Incoming.getValueType() == Builder.getFrameIndexTy() && + "Incoming value is a frame index!"); + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Builder.getFrameIndexTy())); + + auto &MF = Builder.DAG.getMachineFunction(); + auto *MMO = getMachineMemOperand(MF, *FI); + MemRefs.push_back(MMO); + } + } + + // Record computed locations for all lowered values. + // This can not be embedded in lowering loops as we need to record *all* + // values, while previous loops account only values with unique SDValues. + const Instruction *StatepointInstr = SI.StatepointInstr; + auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr]; + + for (const GCRelocateInst *Relocate : SI.GCRelocates) { + const Value *V = Relocate->getDerivedPtr(); + SDValue SDV = Builder.getValue(V); + SDValue Loc = Builder.StatepointLowering.getLocation(SDV); + + if (Loc.getNode()) { + SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); + } else { + // Record value as visited, but not spilled. This is case for allocas + // and constants. For this values we can avoid emitting spill load while + // visiting corresponding gc_relocate. + // Actually we do not need to record them in this map at all. + // We do this only to check that we are not relocating any unvisited + // value. + SpillMap.SlotMap[V] = None; + + // Default llvm mechanisms for exporting values which are used in + // different basic blocks does not work for gc relocates. + // Note that it would be incorrect to teach llvm that all relocates are + // uses of the corresponding values so that it would automatically + // export them. Relocates of the spilled values does not use original + // value. + if (Relocate->getParent() != StatepointInstr->getParent()) + Builder.ExportFromCurrentBlock(V); + } + } +} + +SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( + SelectionDAGBuilder::StatepointLoweringInfo &SI) { + // The basic scheme here is that information about both the original call and + // the safepoint is encoded in the CallInst. We create a temporary call and + // lower it, then reverse engineer the calling sequence. + + NumOfStatepoints++; + // Clear state + StatepointLowering.startNewStatepoint(*this); + +#ifndef NDEBUG + // We schedule gc relocates before removeDuplicateGCPtrs since we _will_ + // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs. + for (auto *Reloc : SI.GCRelocates) + if (Reloc->getParent() == SI.StatepointInstr->getParent()) + StatepointLowering.scheduleRelocCall(*Reloc); +#endif + + // Remove any redundant llvm::Values which map to the same SDValue as another + // input. Also has the effect of removing duplicates in the original + // llvm::Value input list as well. This is a useful optimization for + // reducing the size of the StackMap section. It has no other impact. + removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this, + FuncInfo.StatepointSpillMaps[SI.StatepointInstr]); + assert(SI.Bases.size() == SI.Ptrs.size() && + SI.Ptrs.size() == SI.GCRelocates.size()); + + // Lower statepoint vmstate and gcstate arguments + SmallVector<SDValue, 10> LoweredMetaArgs; + SmallVector<MachineMemOperand*, 16> MemRefs; + lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this); + + // Now that we've emitted the spills, we need to update the root so that the + // call sequence is ordered correctly. + SI.CLI.setChain(getRoot()); + + // Get call node, we will replace it later with statepoint + SDValue ReturnVal; + SDNode *CallNode; + std::tie(ReturnVal, CallNode) = + lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports); + + // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END + // nodes with all the appropriate arguments and return values. + + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + SDValue Chain = CallNode->getOperand(0); + + SDValue Glue; + bool CallHasIncomingGlue = CallNode->getGluedNode(); + if (CallHasIncomingGlue) { + // Glue is always last operand + Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); + } + + // Build the GC_TRANSITION_START node if necessary. + // + // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the + // order in which they appear in the call to the statepoint intrinsic. If + // any of the operands is a pointer-typed, that operand is immediately + // followed by a SRCVALUE for the pointer that may be used during lowering + // (e.g. to form MachinePointerInfo values for loads/stores). + const bool IsGCTransition = + (SI.StatepointFlags & (uint64_t)StatepointFlags::GCTransition) == + (uint64_t)StatepointFlags::GCTransition; + if (IsGCTransition) { + SmallVector<SDValue, 8> TSOps; + + // Add chain + TSOps.push_back(Chain); + + // Add GC transition arguments + for (const Value *V : SI.GCTransitionArgs) { + TSOps.push_back(getValue(V)); + if (V->getType()->isPointerTy()) + TSOps.push_back(DAG.getSrcValue(V)); + } + + // Add glue if necessary + if (CallHasIncomingGlue) + TSOps.push_back(Glue); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDValue GCTransitionStart = + DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps); + + Chain = GCTransitionStart.getValue(0); + Glue = GCTransitionStart.getValue(1); + } + + // TODO: Currently, all of these operands are being marked as read/write in + // PrologEpilougeInserter.cpp, we should special case the VMState arguments + // and flags to be read-only. + SmallVector<SDValue, 40> Ops; + + // Add the <id> and <numBytes> constants. + Ops.push_back(DAG.getTargetConstant(SI.ID, getCurSDLoc(), MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(SI.NumPatchBytes, getCurSDLoc(), MVT::i32)); + + // Calculate and push starting position of vmstate arguments + // Get number of arguments incoming directly into call node + unsigned NumCallRegArgs = + CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3); + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32)); + + // Add call target + SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0); + Ops.push_back(CallTarget); + + // Add call arguments + // Get position of register mask in the call + SDNode::op_iterator RegMaskIt; + if (CallHasIncomingGlue) + RegMaskIt = CallNode->op_end() - 2; + else + RegMaskIt = CallNode->op_end() - 1; + Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); + + // Add a constant argument for the calling convention + pushStackMapConstant(Ops, *this, SI.CLI.CallConv); + + // Add a constant argument for the flags + uint64_t Flags = SI.StatepointFlags; + assert(((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) && + "Unknown flag used"); + pushStackMapConstant(Ops, *this, Flags); + + // Insert all vmstate and gcstate arguments + Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end()); + + // Add register mask from call node + Ops.push_back(*RegMaskIt); + + // Add chain + Ops.push_back(Chain); + + // Same for the glue, but we add it only if original call had it + if (Glue.getNode()) + Ops.push_back(Glue); + + // Compute return values. Provide a glue output since we consume one as + // input. This allows someone else to chain off us as needed. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + MachineSDNode *StatepointMCNode = + DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); + DAG.setNodeMemRefs(StatepointMCNode, MemRefs); + + SDNode *SinkNode = StatepointMCNode; + + // Build the GC_TRANSITION_END node if necessary. + // + // See the comment above regarding GC_TRANSITION_START for the layout of + // the operands to the GC_TRANSITION_END node. + if (IsGCTransition) { + SmallVector<SDValue, 8> TEOps; + + // Add chain + TEOps.push_back(SDValue(StatepointMCNode, 0)); + + // Add GC transition arguments + for (const Value *V : SI.GCTransitionArgs) { + TEOps.push_back(getValue(V)); + if (V->getType()->isPointerTy()) + TEOps.push_back(DAG.getSrcValue(V)); + } + + // Add glue + TEOps.push_back(SDValue(StatepointMCNode, 1)); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDValue GCTransitionStart = + DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps); + + SinkNode = GCTransitionStart.getNode(); + } + + // Replace original call + DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root + // Remove original call node + DAG.DeleteNode(CallNode); + + // DON'T set the root - under the assumption that it's already set past the + // inserted node we created. + + // TODO: A better future implementation would be to emit a single variable + // argument, variable return value STATEPOINT node here and then hookup the + // return value of each gc.relocate to the respective output of the + // previously emitted STATEPOINT value. Unfortunately, this doesn't appear + // to actually be possible today. + + return ReturnVal; +} + +void +SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, + const BasicBlock *EHPadBB /*= nullptr*/) { + assert(ISP.getCall()->getCallingConv() != CallingConv::AnyReg && + "anyregcc is not supported on statepoints!"); + +#ifndef NDEBUG + // If this is a malformed statepoint, report it early to simplify debugging. + // This should catch any IR level mistake that's made when constructing or + // transforming statepoints. + ISP.verify(); + + // Check that the associated GCStrategy expects to encounter statepoints. + assert(GFI->getStrategy().useStatepoints() && + "GCStrategy does not expect to encounter statepoints"); +#endif + + SDValue ActualCallee; + + if (ISP.getNumPatchBytes() > 0) { + // If we've been asked to emit a nop sequence instead of a call instruction + // for this statepoint then don't lower the call target, but use a constant + // `null` instead. Not lowering the call target lets statepoint clients get + // away without providing a physical address for the symbolic call target at + // link time. + + const auto &TLI = DAG.getTargetLoweringInfo(); + const auto &DL = DAG.getDataLayout(); + + unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); + ActualCallee = DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DL, AS)); + } else { + ActualCallee = getValue(ISP.getCalledValue()); + } + + StatepointLoweringInfo SI(DAG); + populateCallLoweringInfo(SI.CLI, ISP.getCall(), + ImmutableStatepoint::CallArgsBeginPos, + ISP.getNumCallArgs(), ActualCallee, + ISP.getActualReturnType(), false /* IsPatchPoint */); + + for (const GCRelocateInst *Relocate : ISP.getRelocates()) { + SI.GCRelocates.push_back(Relocate); + SI.Bases.push_back(Relocate->getBasePtr()); + SI.Ptrs.push_back(Relocate->getDerivedPtr()); + } + + SI.GCArgs = ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end()); + SI.StatepointInstr = ISP.getInstruction(); + SI.GCTransitionArgs = + ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end()); + SI.ID = ISP.getID(); + SI.DeoptState = ArrayRef<const Use>(ISP.deopt_begin(), ISP.deopt_end()); + SI.StatepointFlags = ISP.getFlags(); + SI.NumPatchBytes = ISP.getNumPatchBytes(); + SI.EHPadBB = EHPadBB; + + SDValue ReturnValue = LowerAsSTATEPOINT(SI); + + // Export the result value if needed + const GCResultInst *GCResult = ISP.getGCResult(); + Type *RetTy = ISP.getActualReturnType(); + if (!RetTy->isVoidTy() && GCResult) { + if (GCResult->getParent() != ISP.getCall()->getParent()) { + // Result value will be used in a different basic block so we need to + // export it now. Default exporting mechanism will not work here because + // statepoint call has a different type than the actual call. It means + // that by default llvm will create export register of the wrong type + // (always i32 in our case). So instead we need to create export register + // with correct type manually. + // TODO: To eliminate this problem we can remove gc.result intrinsics + // completely and make statepoint call to return a tuple. + unsigned Reg = FuncInfo.CreateRegs(RetTy); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), Reg, RetTy, + ISP.getCall()->getCallingConv()); + SDValue Chain = DAG.getEntryNode(); + + RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); + PendingExports.push_back(Chain); + FuncInfo.ValueMap[ISP.getInstruction()] = Reg; + } else { + // Result value will be used in a same basic block. Don't export it or + // perform any explicit register copies. + // We'll replace the actuall call node shortly. gc_result will grab + // this value. + setValue(ISP.getInstruction(), ReturnValue); + } + } else { + // The token value is never used from here on, just generate a poison value + setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc())); + } +} + +void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( + const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB, + bool VarArgDisallowed, bool ForceVoidReturnTy) { + StatepointLoweringInfo SI(DAG); + unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin(); + populateCallLoweringInfo( + SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee, + ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(), + false); + if (!VarArgDisallowed) + SI.CLI.IsVarArg = Call->getFunctionType()->isVarArg(); + + auto DeoptBundle = *Call->getOperandBundle(LLVMContext::OB_deopt); + + unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID; + + auto SD = parseStatepointDirectivesFromAttrs(Call->getAttributes()); + SI.ID = SD.StatepointID.getValueOr(DefaultID); + SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0); + + SI.DeoptState = + ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end()); + SI.StatepointFlags = static_cast<uint64_t>(StatepointFlags::None); + SI.EHPadBB = EHPadBB; + + // NB! The GC arguments are deliberately left empty. + + if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) { + ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal); + setValue(Call, ReturnVal); + } +} + +void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle( + const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB) { + LowerCallSiteWithDeoptBundleImpl(Call, Callee, EHPadBB, + /* VarArgDisallowed = */ false, + /* ForceVoidReturnTy = */ false); +} + +void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { + // The result value of the gc_result is simply the result of the actual + // call. We've already emitted this, so just grab the value. + const Instruction *I = CI.getStatepoint(); + + if (I->getParent() != CI.getParent()) { + // Statepoint is in different basic block so we should have stored call + // result in a virtual register. + // We can not use default getValue() functionality to copy value from this + // register because statepoint and actual call return types can be + // different, and getValue() will use CopyFromReg of the wrong type, + // which is always i32 in our case. + PointerType *CalleeType = cast<PointerType>( + ImmutableStatepoint(I).getCalledValue()->getType()); + Type *RetTy = + cast<FunctionType>(CalleeType->getElementType())->getReturnType(); + SDValue CopyFromReg = getCopyFromRegs(I, RetTy); + + assert(CopyFromReg.getNode()); + setValue(&CI, CopyFromReg); + } else { + setValue(&CI, getValue(I)); + } +} + +void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { +#ifndef NDEBUG + // Consistency check + // We skip this check for relocates not in the same basic block as their + // statepoint. It would be too expensive to preserve validation info through + // different basic blocks. + if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) + StatepointLowering.relocCallVisited(Relocate); + + auto *Ty = Relocate.getType()->getScalarType(); + if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty)) + assert(*IsManaged && "Non gc managed pointer relocated!"); +#endif + + const Value *DerivedPtr = Relocate.getDerivedPtr(); + SDValue SD = getValue(DerivedPtr); + + auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()]; + auto SlotIt = SpillMap.find(DerivedPtr); + assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value"); + Optional<int> DerivedPtrLocation = SlotIt->second; + + // We didn't need to spill these special cases (constants and allocas). + // See the handling in spillIncomingValueForStatepoint for detail. + if (!DerivedPtrLocation) { + setValue(&Relocate, SD); + return; + } + + unsigned Index = *DerivedPtrLocation; + SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); + + // Note: We know all of these reloads are independent, but don't bother to + // exploit that chain wise. DAGCombine will happily do so as needed, so + // doing it here would be a small compile time win at most. + SDValue Chain = getRoot(); + + auto &MF = DAG.getMachineFunction(); + auto &MFI = MF.getFrameInfo(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); + auto *LoadMMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MFI.getObjectSize(Index), + MFI.getObjectAlignment(Index)); + + auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + Relocate.getType()); + + SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain, + SpillSlot, LoadMMO); + + DAG.setRoot(SpillLoad.getValue(1)); + + assert(SpillLoad.getNode()); + setValue(&Relocate, SpillLoad); +} + +void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) { + const auto &TLI = DAG.getTargetLoweringInfo(); + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::DEOPTIMIZE), + TLI.getPointerTy(DAG.getDataLayout())); + + // We don't lower calls to __llvm_deoptimize as varargs, but as a regular + // call. We also do not lower the return value to any virtual register, and + // change the immediately following return to a trap instruction. + LowerCallSiteWithDeoptBundleImpl(CI, Callee, /* EHPadBB = */ nullptr, + /* VarArgDisallowed = */ true, + /* ForceVoidReturnTy = */ true); +} + +void SelectionDAGBuilder::LowerDeoptimizingReturn() { + // We do not lower the return value from llvm.deoptimize to any virtual + // register, and change the immediately following return to a trap + // instruction. + if (DAG.getTarget().Options.TrapUnreachable) + DAG.setRoot( + DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h new file mode 100644 index 0000000000000..70507932681d0 --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -0,0 +1,128 @@ +//===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file includes support code use by SelectionDAGBuilder when lowering a +// statepoint sequence in SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include <cassert> + +namespace llvm { + +class CallInst; +class SelectionDAGBuilder; + +/// This class tracks both per-statepoint and per-selectiondag information. +/// For each statepoint it tracks locations of it's gc valuess (incoming and +/// relocated) and list of gcreloc calls scheduled for visiting (this is +/// used for a debug mode consistency check only). The spill slot tracking +/// works in concert with information in FunctionLoweringInfo. +class StatepointLoweringState { +public: + StatepointLoweringState() = default; + + /// Reset all state tracking for a newly encountered safepoint. Also + /// performs some consistency checking. + void startNewStatepoint(SelectionDAGBuilder &Builder); + + /// Clear the memory usage of this object. This is called from + /// SelectionDAGBuilder::clear. We require this is never called in the + /// midst of processing a statepoint sequence. + void clear(); + + /// Returns the spill location of a value incoming to the current + /// statepoint. Will return SDValue() if this value hasn't been + /// spilled. Otherwise, the value has already been spilled and no + /// further action is required by the caller. + SDValue getLocation(SDValue Val) { + auto I = Locations.find(Val); + if (I == Locations.end()) + return SDValue(); + return I->second; + } + + void setLocation(SDValue Val, SDValue Location) { + assert(!Locations.count(Val) && + "Trying to allocate already allocated location"); + Locations[Val] = Location; + } + + /// Record the fact that we expect to encounter a given gc_relocate + /// before the next statepoint. If we don't see it, we'll report + /// an assertion. + void scheduleRelocCall(const CallInst &RelocCall) { + // We are not interested in lowering dead instructions. + if (!RelocCall.use_empty()) + PendingGCRelocateCalls.push_back(&RelocCall); + } + + /// Remove this gc_relocate from the list we're expecting to see + /// before the next statepoint. If we weren't expecting to see + /// it, we'll report an assertion. + void relocCallVisited(const CallInst &RelocCall) { + // We are not interested in lowering dead instructions. + if (RelocCall.use_empty()) + return; + auto I = llvm::find(PendingGCRelocateCalls, &RelocCall); + assert(I != PendingGCRelocateCalls.end() && + "Visited unexpected gcrelocate call"); + PendingGCRelocateCalls.erase(I); + } + + // TODO: Should add consistency tracking to ensure we encounter + // expected gc_result calls too. + + /// Get a stack slot we can use to store an value of type ValueType. This + /// will hopefully be a recylced slot from another statepoint. + SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder); + + void reserveStackSlot(int Offset) { + assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && + "out of bounds"); + assert(!AllocatedStackSlots.test(Offset) && "already reserved!"); + assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!"); + AllocatedStackSlots.set(Offset); + } + + bool isStackSlotAllocated(int Offset) { + assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && + "out of bounds"); + return AllocatedStackSlots.test(Offset); + } + +private: + /// Maps pre-relocation value (gc pointer directly incoming into statepoint) + /// into it's location (currently only stack slots) + DenseMap<SDValue, SDValue> Locations; + + /// A boolean indicator for each slot listed in the FunctionInfo as to + /// whether it has been used in the current statepoint. Since we try to + /// preserve stack slots across safepoints, there can be gaps in which + /// slots have been allocated. + SmallBitVector AllocatedStackSlots; + + /// Points just beyond the last slot known to have been allocated + unsigned NextSlotToAllocate = 0; + + /// Keep track of pending gcrelocate calls for consistency check + SmallVector<const CallInst *, 10> PendingGCRelocateCalls; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp new file mode 100644 index 0000000000000..9ab1324533f1e --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -0,0 +1,7368 @@ +//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include <cctype> +using namespace llvm; + +/// NOTE: The TargetMachine owns TLOF. +TargetLowering::TargetLowering(const TargetMachine &tm) + : TargetLoweringBase(tm) {} + +const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { + return nullptr; +} + +bool TargetLowering::isPositionIndependent() const { + return getTargetMachine().isPositionIndependent(); +} + +/// Check whether a given call node is in tail position within its function. If +/// so, it sets Chain to the input chain of the tail call. +bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, + SDValue &Chain) const { + const Function &F = DAG.getMachineFunction().getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore NoAlias and NonNull because they don't affect the + // call sequence. + AttributeList CallerAttrs = F.getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) + .removeAttribute(Attribute::NoAlias) + .removeAttribute(Attribute::NonNull) + .hasAttributes()) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + return false; + + // Check if the only use is a function return node. + return isUsedByReturnOnly(Node, Chain); +} + +bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, + const uint32_t *CallerPreservedMask, + const SmallVectorImpl<CCValAssign> &ArgLocs, + const SmallVectorImpl<SDValue> &OutVals) const { + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + const CCValAssign &ArgLoc = ArgLocs[I]; + if (!ArgLoc.isRegLoc()) + continue; + Register Reg = ArgLoc.getLocReg(); + // Only look at callee saved registers. + if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) + continue; + // Check that we pass the value used for the caller. + // (We look for a CopyFromReg reading a virtual register that is used + // for the function live-in value of register Reg) + SDValue Value = OutVals[I]; + if (Value->getOpcode() != ISD::CopyFromReg) + return false; + unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); + if (MRI.getLiveInPhysReg(ArgReg) != Reg) + return false; + } + return true; +} + +/// Set CallLoweringInfo attribute flags based on a call instruction +/// and called function attributes. +void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, + unsigned ArgIdx) { + IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt); + IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt); + IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg); + IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet); + IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest); + IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal); + IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca); + IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned); + IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); + IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); + Alignment = Call->getParamAlignment(ArgIdx); + ByValType = nullptr; + if (Call->paramHasAttr(ArgIdx, Attribute::ByVal)) + ByValType = Call->getParamByValType(ArgIdx); +} + +/// Generate a libcall taking the given operands as arguments and returning a +/// result of type RetVT. +std::pair<SDValue, SDValue> +TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, + ArrayRef<SDValue> Ops, + MakeLibCallOptions CallOptions, + const SDLoc &dl) const { + TargetLowering::ArgListTy Args; + Args.reserve(Ops.size()); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i < Ops.size(); ++i) { + SDValue NewOp = Ops[i]; + Entry.Node = NewOp; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(), + CallOptions.IsSExt); + Entry.IsZExt = !Entry.IsSExt; + + if (CallOptions.IsSoften && + !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) { + Entry.IsSExt = Entry.IsZExt = false; + } + Args.push_back(Entry); + } + + if (LC == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported library call operation!"); + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy(DAG.getDataLayout())); + + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + TargetLowering::CallLoweringInfo CLI(DAG); + bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt); + bool zeroExtend = !signExtend; + + if (CallOptions.IsSoften && + !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) { + signExtend = zeroExtend = false; + } + + CLI.setDebugLoc(dl) + .setChain(DAG.getEntryNode()) + .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) + .setNoReturn(CallOptions.DoesNotReturn) + .setDiscardResult(!CallOptions.IsReturnValueUsed) + .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization) + .setSExtResult(signExtend) + .setZExtResult(zeroExtend); + return LowerCallTo(CLI); +} + +bool +TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps, + unsigned Limit, uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, + bool ZeroMemset, + bool MemcpyStrSrc, + bool AllowOverlap, + unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes) const { + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. + if (!(SrcAlign == 0 || SrcAlign >= DstAlign)) + return false; + + EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign, + IsMemset, ZeroMemset, MemcpyStrSrc, + FuncAttributes); + + if (VT == MVT::Other) { + // Use the largest integer type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + VT = MVT::i64; + while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && + !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + assert(VT.isInteger()); + + // Find the largest legal integer type. + MVT LVT = MVT::i64; + while (!isTypeLegal(LVT)) + LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); + assert(LVT.isInteger()); + + // If the type we've chosen is larger than the largest legal integer type + // then use that instead. + if (VT.bitsGT(LVT)) + VT = LVT; + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned VTSize = VT.getSizeInBits() / 8; + while (VTSize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + EVT NewVT = VT; + unsigned NewVTSize; + + bool Found = false; + if (VT.isVector() || VT.isFloatingPoint()) { + NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; + if (isOperationLegalOrCustom(ISD::STORE, NewVT) && + isSafeMemOpType(NewVT.getSimpleVT())) + Found = true; + else if (NewVT == MVT::i64 && + isOperationLegalOrCustom(ISD::STORE, MVT::f64) && + isSafeMemOpType(MVT::f64)) { + // i64 is usually not legal on 32-bit targets, but f64 may be. + NewVT = MVT::f64; + Found = true; + } + } + + if (!Found) { + do { + NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + if (NewVT == MVT::i8) + break; + } while (!isSafeMemOpType(NewVT.getSimpleVT())); + } + NewVTSize = NewVT.getSizeInBits() / 8; + + // If the new VT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + bool Fast; + if (NumMemOps && AllowOverlap && NewVTSize < Size && + allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, + MachineMemOperand::MONone, &Fast) && + Fast) + VTSize = Size; + else { + VT = NewVT; + VTSize = NewVTSize; + } + } + + if (++NumMemOps > Limit) + return false; + + MemOps.push_back(VT); + Size -= VTSize; + } + + return true; +} + +/// Soften the operands of a comparison. This code is shared among BR_CC, +/// SELECT_CC, and SETCC handlers. +void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, + SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, + const SDLoc &dl, const SDValue OldLHS, + const SDValue OldRHS) const { + assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) + && "Unsupported setcc type!"); + + // Expand into one or more soft-fp libcall(s). + RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + bool ShouldInvertCC = false; + switch (CCCode) { + case ISD::SETEQ: + case ISD::SETOEQ: + LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : + (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128; + break; + case ISD::SETNE: + case ISD::SETUNE: + LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : + (VT == MVT::f64) ? RTLIB::UNE_F64 : + (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128; + break; + case ISD::SETGE: + case ISD::SETOGE: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : + (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128; + break; + case ISD::SETLT: + case ISD::SETOLT: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : + (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; + break; + case ISD::SETLE: + case ISD::SETOLE: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : + (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128; + break; + case ISD::SETGT: + case ISD::SETOGT: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : + (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; + break; + case ISD::SETUO: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : + (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128; + break; + case ISD::SETO: + LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : + (VT == MVT::f64) ? RTLIB::O_F64 : + (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128; + break; + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : + (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : + (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; + break; + case ISD::SETUEQ: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : + (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128; + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : + (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128; + break; + default: + // Invert CC for unordered comparisons + ShouldInvertCC = true; + switch (CCCode) { + case ISD::SETULT: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : + (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128; + break; + case ISD::SETULE: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : + (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; + break; + case ISD::SETUGT: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : + (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128; + break; + case ISD::SETUGE: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : + (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; + break; + default: llvm_unreachable("Do not know how to soften this setcc!"); + } + } + + // Use the target specific return value for comparions lib calls. + EVT RetVT = getCmpLibcallReturnType(); + SDValue Ops[2] = {NewLHS, NewRHS}; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { OldLHS.getValueType(), + OldRHS.getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true); + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first; + NewRHS = DAG.getConstant(0, dl, RetVT); + + CCCode = getCmpLibcallCC(LC1); + if (ShouldInvertCC) + CCCode = getSetCCInverse(CCCode, /*isInteger=*/true); + + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { + SDValue Tmp = DAG.getNode( + ISD::SETCC, dl, + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), + NewLHS, NewRHS, DAG.getCondCode(CCCode)); + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first; + NewLHS = DAG.getNode( + ISD::SETCC, dl, + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), + NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + NewRHS = SDValue(); + } +} + +/// Return the entry encoding for a jump table in the current function. The +/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. +unsigned TargetLowering::getJumpTableEncoding() const { + // In non-pic modes, just use the address of a block. + if (!isPositionIndependent()) + return MachineJumpTableInfo::EK_BlockAddress; + + // In PIC mode, if the target supports a GPRel32 directive, use it. + if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr) + return MachineJumpTableInfo::EK_GPRel32BlockAddress; + + // Otherwise, use a label difference. + return MachineJumpTableInfo::EK_LabelDifference32; +} + +SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + // If our PIC model is GP relative, use the global offset table as the base. + unsigned JTEncoding = getJumpTableEncoding(); + + if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || + (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout())); + + return Table; +} + +/// This returns the relocation base for the given PIC jumptable, the same as +/// getPICJumpTableRelocBase, but as an MCExpr. +const MCExpr * +TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, + unsigned JTI,MCContext &Ctx) const{ + // The normal PIC reloc base is the label at the start of the jump table. + return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx); +} + +bool +TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + const TargetMachine &TM = getTargetMachine(); + const GlobalValue *GV = GA->getGlobal(); + + // If the address is not even local to this DSO we will have to load it from + // a got and then add the offset. + if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + return false; + + // If the code is position independent we will have to add a base register. + if (isPositionIndependent()) + return false; + + // Otherwise we can do it. + return true; +} + +//===----------------------------------------------------------------------===// +// Optimization Methods +//===----------------------------------------------------------------------===// + +/// If the specified instruction has a constant integer operand and there are +/// bits set in that constant that are not demanded, then clear those bits and +/// return true. +bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const { + SDLoc DL(Op); + unsigned Opcode = Op.getOpcode(); + + // Do target-specific constant optimization. + if (targetShrinkDemandedConstant(Op, Demanded, TLO)) + return TLO.New.getNode(); + + // FIXME: ISD::SELECT, ISD::SELECT_CC + switch (Opcode) { + default: + break; + case ISD::XOR: + case ISD::AND: + case ISD::OR: { + auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!Op1C) + return false; + + // If this is a 'not' op, don't touch it because that's a canonical form. + const APInt &C = Op1C->getAPIntValue(); + if (Opcode == ISD::XOR && Demanded.isSubsetOf(C)) + return false; + + if (!C.isSubsetOf(Demanded)) { + EVT VT = Op.getValueType(); + SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT); + SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); + return TLO.CombineTo(Op, NewOp); + } + + break; + } + } + + return false; +} + +/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. +/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be +/// generalized for targets with other types of implicit widening casts. +bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, + const APInt &Demanded, + TargetLoweringOpt &TLO) const { + assert(Op.getNumOperands() == 2 && + "ShrinkDemandedOp only supports binary operators!"); + assert(Op.getNode()->getNumValues() == 1 && + "ShrinkDemandedOp only supports nodes with one result!"); + + SelectionDAG &DAG = TLO.DAG; + SDLoc dl(Op); + + // Early return, as this function cannot handle vector types. + if (Op.getValueType().isVector()) + return false; + + // Don't do this if the node has another user, which may require the + // full value. + if (!Op.getNode()->hasOneUse()) + return false; + + // Search for the smallest integer type with free casts to and from + // Op's type. For expedience, just check power-of-2 integer types. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned DemandedSize = Demanded.getActiveBits(); + unsigned SmallVTBits = DemandedSize; + if (!isPowerOf2_32(SmallVTBits)) + SmallVTBits = NextPowerOf2(SmallVTBits); + for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits); + if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && + TLI.isZExtFree(SmallVT, Op.getValueType())) { + // We found a type with free casts. + SDValue X = DAG.getNode( + Op.getOpcode(), dl, SmallVT, + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1))); + assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?"); + SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X); + return TLO.CombineTo(Op, Z); + } + } + return false; +} + +bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + KnownBits Known; + + bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO); + if (Simplified) { + DCI.AddToWorklist(Op.getNode()); + DCI.CommitTargetLoweringOpt(TLO); + } + return Simplified; +} + +bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth, + bool AssumeSingleUse) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth, + AssumeSingleUse); +} + +// TODO: Can we merge SelectionDAG::GetDemandedBits into this? +// TODO: Under what circumstances can we create nodes? Constant folding? +SDValue TargetLowering::SimplifyMultipleUseDemandedBits( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + // Limit search depth. + if (Depth >= SelectionDAG::MaxRecursionDepth) + return SDValue(); + + // Ignore UNDEFs. + if (Op.isUndef()) + return SDValue(); + + // Not demanding any bits/elts from Op. + if (DemandedBits == 0 || DemandedElts == 0) + return DAG.getUNDEF(Op.getValueType()); + + unsigned NumElts = DemandedElts.getBitWidth(); + KnownBits LHSKnown, RHSKnown; + switch (Op.getOpcode()) { + case ISD::BITCAST: { + SDValue Src = peekThroughBitcasts(Op.getOperand(0)); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); + unsigned NumDstEltBits = DstVT.getScalarSizeInBits(); + + if (NumSrcEltBits == NumDstEltBits) + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedBits, DemandedElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + + // TODO - bigendian once we have test coverage. + if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 && + DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumDstEltBits / NumSrcEltBits; + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != Scale; ++i) { + unsigned Offset = i * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + if (!Sub.isNullValue()) { + DemandedSrcBits |= Sub; + for (unsigned j = 0; j != NumElts; ++j) + if (DemandedElts[j]) + DemandedSrcElts.setBit((j * Scale) + i); + } + } + + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + } + + // TODO - bigendian once we have test coverage. + if ((NumSrcEltBits % NumDstEltBits) == 0 && + DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumSrcEltBits / NumDstEltBits; + unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Offset = (i % Scale) * NumDstEltBits; + DemandedSrcBits.insertBits(DemandedBits, Offset); + DemandedSrcElts.setBit(i / Scale); + } + + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + } + + break; + } + case ISD::AND: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and' in this + // context. + if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) + return Op.getOperand(1); + break; + } + case ISD::OR: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known zero on one side, return the + // other. These bits cannot contribute to the result of the 'or' in this + // context. + if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) + return Op.getOperand(1); + break; + } + case ISD::XOR: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known zero on one side, return the + // other. + if (DemandedBits.isSubsetOf(RHSKnown.Zero)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(LHSKnown.Zero)) + return Op.getOperand(1); + break; + } + case ISD::SIGN_EXTEND_INREG: { + // If none of the extended bits are demanded, eliminate the sextinreg. + EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits()) + return Op.getOperand(0); + break; + } + case ISD::INSERT_VECTOR_ELT: { + // If we don't demand the inserted element, return the base vector. + SDValue Vec = Op.getOperand(0); + auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + EVT VecVT = Vec.getValueType(); + if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && + !DemandedElts[CIdx->getZExtValue()]) + return Vec; + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); + + // If all the demanded elts are from one operand and are inline, + // then we can use the operand directly. + bool AllUndef = true, IdentityLHS = true, IdentityRHS = true; + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0 || !DemandedElts[i]) + continue; + AllUndef = false; + IdentityLHS &= (M == (int)i); + IdentityRHS &= ((M - NumElts) == i); + } + + if (AllUndef) + return DAG.getUNDEF(Op.getValueType()); + if (IdentityLHS) + return Op.getOperand(0); + if (IdentityRHS) + return Op.getOperand(1); + break; + } + default: + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) + if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode( + Op, DemandedBits, DemandedElts, DAG, Depth)) + return V; + break; + } + return SDValue(); +} + +/// Look at Op. At this point, we know that only the OriginalDemandedBits of the +/// result of Op are ever used downstream. If we can use this information to +/// simplify Op, create a new simplified DAG node and return true, returning the +/// original and new nodes in Old and New. Otherwise, analyze the expression and +/// return a mask of Known bits for the expression (used to simplify the +/// caller). The Known bits may only be accurate for those bits in the +/// OriginalDemandedBits and OriginalDemandedElts. +bool TargetLowering::SimplifyDemandedBits( + SDValue Op, const APInt &OriginalDemandedBits, + const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, + unsigned Depth, bool AssumeSingleUse) const { + unsigned BitWidth = OriginalDemandedBits.getBitWidth(); + assert(Op.getScalarValueSizeInBits() == BitWidth && + "Mask size mismatches value type size!"); + + unsigned NumElts = OriginalDemandedElts.getBitWidth(); + assert((!Op.getValueType().isVector() || + NumElts == Op.getValueType().getVectorNumElements()) && + "Unexpected vector size"); + + APInt DemandedBits = OriginalDemandedBits; + APInt DemandedElts = OriginalDemandedElts; + SDLoc dl(Op); + auto &DL = TLO.DAG.getDataLayout(); + + // Don't know anything. + Known = KnownBits(BitWidth); + + // Undef operand. + if (Op.isUndef()) + return false; + + if (Op.getOpcode() == ISD::Constant) { + // We know all of the bits for a constant! + Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); + Known.Zero = ~Known.One; + return false; + } + + // Other users may use these bits. + EVT VT = Op.getValueType(); + if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { + if (Depth != 0) { + // If not at the root, Just compute the Known bits to + // simplify things downstream. + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + return false; + } + // If this is the root being simplified, allow it to have multiple uses, + // just set the DemandedBits/Elts to all bits. + DemandedBits = APInt::getAllOnesValue(BitWidth); + DemandedElts = APInt::getAllOnesValue(NumElts); + } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { + // Not demanding any bits/elts from Op. + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + } else if (Depth >= SelectionDAG::MaxRecursionDepth) { + // Limit search depth. + return false; + } + + KnownBits Known2, KnownOut; + switch (Op.getOpcode()) { + case ISD::TargetConstant: + llvm_unreachable("Can't simplify this node"); + case ISD::SCALAR_TO_VECTOR: { + if (!DemandedElts[0]) + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + + KnownBits SrcKnown; + SDValue Src = Op.getOperand(0); + unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); + APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth); + if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1)) + return true; + Known = SrcKnown.zextOrTrunc(BitWidth, false); + break; + } + case ISD::BUILD_VECTOR: + // Collect the known bits that are shared by every demanded element. + // TODO: Call SimplifyDemandedBits for non-constant demanded elements. + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + return false; // Don't fall through, will infinitely loop. + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); + if (getTargetConstantFromLoad(LD)) { + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + return false; // Don't fall through, will infinitely loop. + } + break; + } + case ISD::INSERT_VECTOR_ELT: { + SDValue Vec = Op.getOperand(0); + SDValue Scl = Op.getOperand(1); + auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + EVT VecVT = Vec.getValueType(); + + // If index isn't constant, assume we need all vector elements AND the + // inserted element. + APInt DemandedVecElts(DemandedElts); + if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) { + unsigned Idx = CIdx->getZExtValue(); + DemandedVecElts.clearBit(Idx); + + // Inserted element is not required. + if (!DemandedElts[Idx]) + return TLO.CombineTo(Op, Vec); + } + + KnownBits KnownScl; + unsigned NumSclBits = Scl.getScalarValueSizeInBits(); + APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits); + if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1)) + return true; + + Known = KnownScl.zextOrTrunc(BitWidth, false); + + KnownBits KnownVec; + if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO, + Depth + 1)) + return true; + + if (!!DemandedVecElts) { + Known.One &= KnownVec.One; + Known.Zero &= KnownVec.Zero; + } + + return false; + } + case ISD::INSERT_SUBVECTOR: { + SDValue Base = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + EVT SubVT = Sub.getValueType(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + + // If index isn't constant, assume we need the original demanded base + // elements and ALL the inserted subvector elements. + APInt BaseElts = DemandedElts; + APInt SubElts = APInt::getAllOnesValue(NumSubElts); + if (isa<ConstantSDNode>(Op.getOperand(2))) { + const APInt &Idx = Op.getConstantOperandAPInt(2); + if (Idx.ule(NumElts - NumSubElts)) { + unsigned SubIdx = Idx.getZExtValue(); + SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); + BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); + } + } + + KnownBits KnownSub, KnownBase; + if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO, + Depth + 1)) + return true; + if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO, + Depth + 1)) + return true; + + Known.Zero.setAllBits(); + Known.One.setAllBits(); + if (!!SubElts) { + Known.One &= KnownSub.One; + Known.Zero &= KnownSub.Zero; + } + if (!!BaseElts) { + Known.One &= KnownBase.One; + Known.Zero &= KnownBase.Zero; + } + break; + } + case ISD::EXTRACT_SUBVECTOR: { + // If index isn't constant, assume we need all the source vector elements. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt SrcElts = APInt::getAllOnesValue(NumSrcElts); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + } + if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1)) + return true; + break; + } + case ISD::CONCAT_VECTORS: { + Known.Zero.setAllBits(); + Known.One.setAllBits(); + EVT SubVT = Op.getOperand(0).getValueType(); + unsigned NumSubVecs = Op.getNumOperands(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + for (unsigned i = 0; i != NumSubVecs; ++i) { + APInt DemandedSubElts = + DemandedElts.extractBits(NumSubElts, i * NumSubElts); + if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts, + Known2, TLO, Depth + 1)) + return true; + // Known bits are shared by every demanded subvector element. + if (!!DemandedSubElts) { + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + } + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); + + // Collect demanded elements from shuffle operands.. + APInt DemandedLHS(NumElts, 0); + APInt DemandedRHS(NumElts, 0); + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + int M = ShuffleMask[i]; + if (M < 0) { + // For UNDEF elements, we don't know anything about the common state of + // the shuffle result. + DemandedLHS.clearAllBits(); + DemandedRHS.clearAllBits(); + break; + } + assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range"); + if (M < (int)NumElts) + DemandedLHS.setBit(M); + else + DemandedRHS.setBit(M - NumElts); + } + + if (!!DemandedLHS || !!DemandedRHS) { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + Known.Zero.setAllBits(); + Known.One.setAllBits(); + if (!!DemandedLHS) { + if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO, + Depth + 1)) + return true; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + if (!!DemandedRHS) { + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO, + Depth + 1)) + return true; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + + // Attempt to avoid multi-use ops if we don't need anything from them. + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask); + return TLO.CombineTo(Op, NewOp); + } + } + break; + } + case ISD::AND: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + // If the RHS is a constant, check to see if the LHS would be zero without + // using the bits from the RHS. Below, we use knowledge about the RHS to + // simplify the LHS, here we're using information from the LHS to simplify + // the RHS. + if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) { + // Do not increment Depth here; that can cause an infinite loop. + KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth); + // If the LHS already has zeros where RHSC does, this 'and' is dead. + if ((LHSKnown.Zero & DemandedBits) == + (~RHSC->getAPIntValue() & DemandedBits)) + return TLO.CombineTo(Op, Op0); + + // If any of the set bits in the RHS are known zero on the LHS, shrink + // the constant. + if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO)) + return true; + + // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its + // constant, but if this 'and' is only clearing bits that were just set by + // the xor, then this 'and' can be eliminated by shrinking the mask of + // the xor. For example, for a 32-bit X: + // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 + if (isBitwiseNot(Op0) && Op0.hasOneUse() && + LHSKnown.One == ~RHSC->getAPIntValue()) { + SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1); + return TLO.CombineTo(Op, Xor); + } + } + + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, + Known2, TLO, Depth + 1)) + return true; + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + + // If all of the demanded bits are known one on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if (DemandedBits.isSubsetOf(Known2.Zero | Known.One)) + return TLO.CombineTo(Op, Op0); + if (DemandedBits.isSubsetOf(Known.Zero | Known2.One)) + return TLO.CombineTo(Op, Op1); + // If all of the demanded bits in the inputs are known zeros, return zero. + if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO)) + return true; + // If the operation can be done in a smaller type, do so. + if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) + return true; + + // Output known-1 bits are only known if set in both the LHS & RHS. + Known.One &= Known2.One; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + Known.Zero |= Known2.Zero; + break; + } + case ISD::OR: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, + Known2, TLO, Depth + 1)) + return true; + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) + return TLO.CombineTo(Op, Op0); + if (DemandedBits.isSubsetOf(Known.One | Known2.Zero)) + return TLO.CombineTo(Op, Op1); + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) + return true; + // If the operation can be done in a smaller type, do so. + if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) + return true; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + Known.Zero &= Known2.Zero; + // Output known-1 are known to be set if set in either the LHS | RHS. + Known.One |= Known2.One; + break; + } + case ISD::XOR: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if (DemandedBits.isSubsetOf(Known.Zero)) + return TLO.CombineTo(Op, Op0); + if (DemandedBits.isSubsetOf(Known2.Zero)) + return TLO.CombineTo(Op, Op1); + // If the operation can be done in a smaller type, do so. + if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) + return true; + + // If all of the unknown bits are known to be zero on one side or the other + // (but not both) turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + + if (ConstantSDNode *C = isConstOrConstSplat(Op1)) { + // If one side is a constant, and all of the known set bits on the other + // side are also set in the constant, turn this into an AND, as we know + // the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + // NB: it is okay if more bits are known than are requested + if (C->getAPIntValue() == Known2.One) { + SDValue ANDC = + TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC)); + } + + // If the RHS is a constant, see if we can change it. Don't alter a -1 + // constant because that's a 'not' op, and that is better for combining + // and codegen. + if (!C->isAllOnesValue()) { + if (DemandedBits.isSubsetOf(C->getAPIntValue())) { + // We're flipping all demanded bits. Flip the undemanded bits too. + SDValue New = TLO.DAG.getNOT(dl, Op0, VT); + return TLO.CombineTo(Op, New); + } + // If we can't turn this into a 'not', try to shrink the constant. + if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) + return true; + } + } + + Known = std::move(KnownOut); + break; + } + case ISD::SELECT: + if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO, + Depth + 1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) + return true; + + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + case ISD::SELECT_CC: + if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO, + Depth + 1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) + return true; + + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + case ISD::SETCC: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + // If (1) we only need the sign-bit, (2) the setcc operands are the same + // width as the setcc result, and (3) the result of a setcc conforms to 0 or + // -1, we may be able to bypass the setcc. + if (DemandedBits.isSignMask() && + Op0.getScalarValueSizeInBits() == BitWidth && + getBooleanContents(VT) == + BooleanContent::ZeroOrNegativeOneBooleanContent) { + // If we're testing X < 0, then this compare isn't needed - just use X! + // FIXME: We're limiting to integer types here, but this should also work + // if we don't care about FP signed-zero. The use of SETLT with FP means + // that we don't care about NaNs. + if (CC == ISD::SETLT && Op1.getValueType().isInteger() && + (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode()))) + return TLO.CombineTo(Op, Op0); + + // TODO: Should we check for other forms of sign-bit comparisons? + // Examples: X <= -1, X >= 0 + } + if (getBooleanContents(Op0.getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + break; + } + case ISD::SHL: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { + // If the shift count is an invalid immediate, don't do anything. + if (SA->getAPIntValue().uge(BitWidth)) + break; + + unsigned ShAmt = SA->getZExtValue(); + if (ShAmt == 0) + return TLO.CombineTo(Op, Op0); + + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a + // single shift. We can do this if the bottom bits (which are shifted + // out) are never demanded. + // TODO - support non-uniform vector amounts. + if (Op0.getOpcode() == ISD::SRL) { + if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) { + if (ConstantSDNode *SA2 = + isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { + if (SA2->getAPIntValue().ult(BitWidth)) { + unsigned C1 = SA2->getZExtValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt - C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; + } + + SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType()); + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); + } + } + } + } + + if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, + Known, TLO, Depth + 1)) + return true; + + // Try shrinking the operation as long as the shift amount will still be + // in range. + if ((ShAmt < DemandedBits.getActiveBits()) && + ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) + return true; + + // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits + // are not demanded. This will likely allow the anyext to be folded away. + if (Op0.getOpcode() == ISD::ANY_EXTEND) { + SDValue InnerOp = Op0.getOperand(0); + EVT InnerVT = InnerOp.getValueType(); + unsigned InnerBits = InnerVT.getScalarSizeInBits(); + if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits && + isTypeDesirableForOp(ISD::SHL, InnerVT)) { + EVT ShTy = getShiftAmountTy(InnerVT, DL); + if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) + ShTy = InnerVT; + SDValue NarrowShl = + TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, + TLO.DAG.getConstant(ShAmt, dl, ShTy)); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); + } + // Repeat the SHL optimization above in cases where an extension + // intervenes: (shl (anyext (shr x, c1)), c2) to + // (shl (anyext x), c2-c1). This requires that the bottom c1 bits + // aren't demanded (as above) and that the shifted upper c1 bits of + // x aren't demanded. + if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL && + InnerOp.hasOneUse()) { + if (ConstantSDNode *SA2 = + isConstOrConstSplat(InnerOp.getOperand(1))) { + unsigned InnerShAmt = SA2->getLimitedValue(InnerBits); + if (InnerShAmt < ShAmt && InnerShAmt < InnerBits && + DemandedBits.getActiveBits() <= + (InnerBits - InnerShAmt + ShAmt) && + DemandedBits.countTrailingZeros() >= ShAmt) { + SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, + Op1.getValueType()); + SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, + InnerOp.getOperand(0)); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA)); + } + } + } + } + + Known.Zero <<= ShAmt; + Known.One <<= ShAmt; + // low bits known zero. + Known.Zero.setLowBits(ShAmt); + } + break; + } + case ISD::SRL: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { + // If the shift count is an invalid immediate, don't do anything. + if (SA->getAPIntValue().uge(BitWidth)) + break; + + unsigned ShAmt = SA->getZExtValue(); + if (ShAmt == 0) + return TLO.CombineTo(Op, Op0); + + EVT ShiftVT = Op1.getValueType(); + APInt InDemandedMask = (DemandedBits << ShAmt); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (Op->getFlags().hasExact()) + InDemandedMask.setLowBits(ShAmt); + + // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a + // single shift. We can do this if the top bits (which are shifted out) + // are never demanded. + // TODO - support non-uniform vector amounts. + if (Op0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SA2 = + isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { + if (!DemandedBits.intersects( + APInt::getHighBitsSet(BitWidth, ShAmt))) { + if (SA2->getAPIntValue().ult(BitWidth)) { + unsigned C1 = SA2->getZExtValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt - C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; + } + + SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT); + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); + } + } + } + } + + // Compute the new bits that are at the top now. + if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + Known.Zero.lshrInPlace(ShAmt); + Known.One.lshrInPlace(ShAmt); + + Known.Zero.setHighBits(ShAmt); // High bits known zero. + } + break; + } + case ISD::SRA: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + // If this is an arithmetic shift right and only the low-bit is set, we can + // always convert this into a logical shr, even if the shift amount is + // variable. The low bit of the shift cannot be an input sign bit unless + // the shift amount is >= the size of the datatype, which is undefined. + if (DemandedBits.isOneValue()) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); + + if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { + // If the shift count is an invalid immediate, don't do anything. + if (SA->getAPIntValue().uge(BitWidth)) + break; + + unsigned ShAmt = SA->getZExtValue(); + if (ShAmt == 0) + return TLO.CombineTo(Op, Op0); + + APInt InDemandedMask = (DemandedBits << ShAmt); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (Op->getFlags().hasExact()) + InDemandedMask.setLowBits(ShAmt); + + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + if (DemandedBits.countLeadingZeros() < ShAmt) + InDemandedMask.setSignBit(); + + if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + Known.Zero.lshrInPlace(ShAmt); + Known.One.lshrInPlace(ShAmt); + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if (Known.Zero[BitWidth - ShAmt - 1] || + DemandedBits.countLeadingZeros() >= ShAmt) { + SDNodeFlags Flags; + Flags.setExact(Op->getFlags().hasExact()); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags)); + } + + int Log2 = DemandedBits.exactLogBase2(); + if (Log2 >= 0) { + // The bit must come from the sign. + SDValue NewSA = + TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA)); + } + + if (Known.One[BitWidth - ShAmt - 1]) + // New bits are known one. + Known.One.setHighBits(ShAmt); + } + break; + } + case ISD::FSHL: + case ISD::FSHR: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + bool IsFSHL = (Op.getOpcode() == ISD::FSHL); + + if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) { + unsigned Amt = SA->getAPIntValue().urem(BitWidth); + + // For fshl, 0-shift returns the 1st arg. + // For fshr, 0-shift returns the 2nd arg. + if (Amt == 0) { + if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts, + Known, TLO, Depth + 1)) + return true; + break; + } + + // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt)) + // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt) + APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt)); + APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt); + if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO, + Depth + 1)) + return true; + + Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt)); + Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt)); + Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); + Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); + Known.One |= Known2.One; + Known.Zero |= Known2.Zero; + } + break; + } + case ISD::BITREVERSE: { + SDValue Src = Op.getOperand(0); + APInt DemandedSrcBits = DemandedBits.reverseBits(); + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + Known.One = Known2.One.reverseBits(); + Known.Zero = Known2.Zero.reverseBits(); + break; + } + case ISD::SIGN_EXTEND_INREG: { + SDValue Op0 = Op.getOperand(0); + EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + unsigned ExVTBits = ExVT.getScalarSizeInBits(); + + // If we only care about the highest bit, don't bother shifting right. + if (DemandedBits.isSignMask()) { + unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0); + bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1; + // However if the input is already sign extended we expect the sign + // extension to be dropped altogether later and do not simplify. + if (!AlreadySignExtended) { + // Compute the correct shift amount type, which must be getShiftAmountTy + // for scalar types after legalization. + EVT ShiftAmtTy = VT; + if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); + + SDValue ShiftAmt = + TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy); + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt)); + } + } + + // If none of the extended bits are demanded, eliminate the sextinreg. + if (DemandedBits.getActiveBits() <= ExVTBits) + return TLO.CombineTo(Op, Op0); + + APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits); + + // Since the sign extended bits are demanded, we know that the sign + // bit is demanded. + InputDemandedBits.setBit(ExVTBits - 1); + + if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, convert this into a zero extension. + if (Known.Zero[ExVTBits - 1]) + return TLO.CombineTo( + Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType())); + + APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits); + if (Known.One[ExVTBits - 1]) { // Input sign bit known set + Known.One.setBitsFrom(ExVTBits); + Known.Zero &= Mask; + } else { // Input sign bit unknown + Known.Zero &= Mask; + Known.One &= Mask; + } + break; + } + case ISD::BUILD_PAIR: { + EVT HalfVT = Op.getOperand(0).getValueType(); + unsigned HalfBitWidth = HalfVT.getScalarSizeInBits(); + + APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth); + APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth); + + KnownBits KnownLo, KnownHi; + + if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1)) + return true; + + if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1)) + return true; + + Known.Zero = KnownLo.Zero.zext(BitWidth) | + KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth); + + Known.One = KnownLo.One.zext(BitWidth) | + KnownHi.One.zext(BitWidth).shl(HalfBitWidth); + break; + } + case ISD::ZERO_EXTEND: + case ISD::ZERO_EXTEND_VECTOR_INREG: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + unsigned InBits = SrcVT.getScalarSizeInBits(); + unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG; + + // If none of the top bits are demanded, convert this into an any_extend. + if (DemandedBits.getActiveBits() <= InBits) { + // If we only need the non-extended bits of the bottom element + // then we can just bitcast to the result. + if (IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits() && + TLO.DAG.getDataLayout().isLittleEndian()) + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); + + unsigned Opc = + IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND; + if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); + } + + APInt InDemandedBits = DemandedBits.trunc(InBits); + APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(Known.getBitWidth() == InBits && "Src width has changed?"); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + break; + } + case ISD::SIGN_EXTEND: + case ISD::SIGN_EXTEND_VECTOR_INREG: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + unsigned InBits = SrcVT.getScalarSizeInBits(); + unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG; + + // If none of the top bits are demanded, convert this into an any_extend. + if (DemandedBits.getActiveBits() <= InBits) { + // If we only need the non-extended bits of the bottom element + // then we can just bitcast to the result. + if (IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits() && + TLO.DAG.getDataLayout().isLittleEndian()) + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); + + unsigned Opc = + IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND; + if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); + } + + APInt InDemandedBits = DemandedBits.trunc(InBits); + APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + + // Since some of the sign extended bits are demanded, we know that the sign + // bit is demanded. + InDemandedBits.setBit(InBits - 1); + + if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(Known.getBitWidth() == InBits && "Src width has changed?"); + + // If the sign bit is known one, the top bits match. + Known = Known.sext(BitWidth); + + // If the sign bit is known zero, convert this to a zero extend. + if (Known.isNonNegative()) { + unsigned Opc = + IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND; + if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); + } + break; + } + case ISD::ANY_EXTEND: + case ISD::ANY_EXTEND_VECTOR_INREG: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + unsigned InBits = SrcVT.getScalarSizeInBits(); + unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG; + + // If we only need the bottom element then we can just bitcast. + // TODO: Handle ANY_EXTEND? + if (IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits() && + TLO.DAG.getDataLayout().isLittleEndian()) + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); + + APInt InDemandedBits = DemandedBits.trunc(InBits); + APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(Known.getBitWidth() == InBits && "Src width has changed?"); + Known = Known.zext(BitWidth, false /* => any extend */); + break; + } + case ISD::TRUNCATE: { + SDValue Src = Op.getOperand(0); + + // Simplify the input, using demanded bit information, and compute the known + // zero/one bits live out. + unsigned OperandBitWidth = Src.getScalarValueSizeInBits(); + APInt TruncMask = DemandedBits.zext(OperandBitWidth); + if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1)) + return true; + Known = Known.trunc(BitWidth); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc)); + + // If the input is only used by this truncate, see if we can shrink it based + // on the known demanded bits. + if (Src.getNode()->hasOneUse()) { + switch (Src.getOpcode()) { + default: + break; + case ISD::SRL: + // Shrink SRL by a constant if none of the high bits shifted in are + // demanded. + if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT)) + // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is + // undesirable. + break; + + auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); + if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth)) + break; + + SDValue Shift = Src.getOperand(1); + uint64_t ShVal = ShAmt->getZExtValue(); + + if (TLO.LegalTypes()) + Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); + + APInt HighBits = + APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); + HighBits.lshrInPlace(ShVal); + HighBits = HighBits.trunc(BitWidth); + + if (!(HighBits & DemandedBits)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = + TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0)); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift)); + } + break; + } + } + + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + break; + } + case ISD::AssertZext: { + // AssertZext demands all of the high bits, plus any of the low bits + // demanded by its users. + EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits()); + if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known, + TLO, Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + + Known.Zero |= ~InMask; + break; + } + case ISD::EXTRACT_VECTOR_ELT: { + SDValue Src = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + unsigned EltBitWidth = Src.getScalarValueSizeInBits(); + + // Demand the bits from every vector element without a constant index. + APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) + if (CIdx->getAPIntValue().ult(NumSrcElts)) + DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue()); + + // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know + // anything about the extended bits. + APInt DemandedSrcBits = DemandedBits; + if (BitWidth > EltBitWidth) + DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth); + + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO, + Depth + 1)) + return true; + + Known = Known2; + if (BitWidth > EltBitWidth) + Known = Known.zext(BitWidth, false /* => any extend */); + break; + } + case ISD::BITCAST: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); + + // If this is an FP->Int bitcast and if the sign bit is the only + // thing demanded, turn this into a FGETSIGN. + if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() && + DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) && + SrcVT.isFloatingPoint()) { + bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT); + bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); + if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 && + SrcVT != MVT::f128) { + // Cannot eliminate/lower SHL for f128 yet. + EVT Ty = OpVTLegal ? VT : MVT::i32; + // Make a FGETSIGN + SHL to move the sign bit into the appropriate + // place. We expect the SHL to be eliminated by other optimizations. + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src); + unsigned OpVTSizeInBits = Op.getValueSizeInBits(); + if (!OpVTLegal && OpVTSizeInBits > 32) + Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign); + unsigned ShVal = Op.getValueSizeInBits() - 1; + SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT); + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt)); + } + } + + // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. + // Demand the elt/bit if any of the original elts/bits are demanded. + // TODO - bigendian once we have test coverage. + if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 && + TLO.DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = BitWidth / NumSrcEltBits; + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != Scale; ++i) { + unsigned Offset = i * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + if (!Sub.isNullValue()) { + DemandedSrcBits |= Sub; + for (unsigned j = 0; j != NumElts; ++j) + if (DemandedElts[j]) + DemandedSrcElts.setBit((j * Scale) + i); + } + } + + APInt KnownSrcUndef, KnownSrcZero; + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef, + KnownSrcZero, TLO, Depth + 1)) + return true; + + KnownBits KnownSrcBits; + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, + KnownSrcBits, TLO, Depth + 1)) + return true; + } else if ((NumSrcEltBits % BitWidth) == 0 && + TLO.DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumSrcEltBits / BitWidth; + unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Offset = (i % Scale) * BitWidth; + DemandedSrcBits.insertBits(DemandedBits, Offset); + DemandedSrcElts.setBit(i / Scale); + } + + if (SrcVT.isVector()) { + APInt KnownSrcUndef, KnownSrcZero; + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef, + KnownSrcZero, TLO, Depth + 1)) + return true; + } + + KnownBits KnownSrcBits; + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, + KnownSrcBits, TLO, Depth + 1)) + return true; + } + + // If this is a bitcast, let computeKnownBits handle it. Only do this on a + // recursive call where Known may be useful to the caller. + if (Depth > 0) { + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + return false; + } + break; + } + case ISD::ADD: + case ISD::MUL: + case ISD::SUB: { + // Add, Sub, and Mul don't demand any bits in positions beyond that + // of the highest bit demanded of them. + SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); + SDNodeFlags Flags = Op.getNode()->getFlags(); + unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros(); + APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); + if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, + Depth + 1) || + SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, + Depth + 1) || + // See if the operation should be performed at a smaller bit width. + ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) { + if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); + return TLO.CombineTo(Op, NewOp); + } + return true; + } + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); + return TLO.CombineTo(Op, NewOp); + } + } + + // If we have a constant operand, we may be able to turn it into -1 if we + // do not demand the high bits. This can make the constant smaller to + // encode, allow more general folding, or match specialized instruction + // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that + // is probably not useful (and could be detrimental). + ConstantSDNode *C = isConstOrConstSplat(Op1); + APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ); + if (C && !C->isAllOnesValue() && !C->isOne() && + (C->getAPIntValue() | HighMask).isAllOnesValue()) { + SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags); + return TLO.CombineTo(Op, NewOp); + } + + LLVM_FALLTHROUGH; + } + default: + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts, + Known, TLO, Depth)) + return true; + break; + } + + // Just use computeKnownBits to compute output bits. + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + break; + } + + // If we know the value of all of the demanded bits, return this as a + // constant. + if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) { + // Avoid folding to a constant if any OpaqueConstant is involved. + const SDNode *N = Op.getNode(); + for (SDNodeIterator I = SDNodeIterator::begin(N), + E = SDNodeIterator::end(N); + I != E; ++I) { + SDNode *Op = *I; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) + if (C->isOpaque()) + return false; + } + // TODO: Handle float bits as well. + if (VT.isInteger()) + return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); + } + + return false; +} + +bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, + const APInt &DemandedElts, + APInt &KnownUndef, + APInt &KnownZero, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + + bool Simplified = + SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO); + if (Simplified) { + DCI.AddToWorklist(Op.getNode()); + DCI.CommitTargetLoweringOpt(TLO); + } + + return Simplified; +} + +/// Given a vector binary operation and known undefined elements for each input +/// operand, compute whether each element of the output is undefined. +static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, + const APInt &UndefOp0, + const APInt &UndefOp1) { + EVT VT = BO.getValueType(); + assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() && + "Vector binop only"); + + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + assert(UndefOp0.getBitWidth() == NumElts && + UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis"); + + auto getUndefOrConstantElt = [&](SDValue V, unsigned Index, + const APInt &UndefVals) { + if (UndefVals[Index]) + return DAG.getUNDEF(EltVT); + + if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { + // Try hard to make sure that the getNode() call is not creating temporary + // nodes. Ignore opaque integers because they do not constant fold. + SDValue Elt = BV->getOperand(Index); + auto *C = dyn_cast<ConstantSDNode>(Elt); + if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque())) + return Elt; + } + + return SDValue(); + }; + + APInt KnownUndef = APInt::getNullValue(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + // If both inputs for this element are either constant or undef and match + // the element type, compute the constant/undef result for this element of + // the vector. + // TODO: Ideally we would use FoldConstantArithmetic() here, but that does + // not handle FP constants. The code within getNode() should be refactored + // to avoid the danger of creating a bogus temporary node here. + SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0); + SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1); + if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT) + if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef()) + KnownUndef.setBit(i); + } + return KnownUndef; +} + +bool TargetLowering::SimplifyDemandedVectorElts( + SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef, + APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, + bool AssumeSingleUse) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = OriginalDemandedElts; + unsigned NumElts = DemandedElts.getBitWidth(); + assert(VT.isVector() && "Expected vector op"); + assert(VT.getVectorNumElements() == NumElts && + "Mask size mismatches value type element count!"); + + KnownUndef = KnownZero = APInt::getNullValue(NumElts); + + // Undef operand. + if (Op.isUndef()) { + KnownUndef.setAllBits(); + return false; + } + + // If Op has other users, assume that all elements are needed. + if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) + DemandedElts.setAllBits(); + + // Not demanding any elements from Op. + if (DemandedElts == 0) { + KnownUndef.setAllBits(); + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + } + + // Limit search depth. + if (Depth >= SelectionDAG::MaxRecursionDepth) + return false; + + SDLoc DL(Op); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + switch (Op.getOpcode()) { + case ISD::SCALAR_TO_VECTOR: { + if (!DemandedElts[0]) { + KnownUndef.setAllBits(); + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + } + KnownUndef.setHighBits(NumElts - 1); + break; + } + case ISD::BITCAST: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // We only handle vectors here. + // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits? + if (!SrcVT.isVector()) + break; + + // Fast handling of 'identity' bitcasts. + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + if (NumSrcElts == NumElts) + return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1); + + APInt SrcZero, SrcUndef; + APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts); + + // Bitcast from 'large element' src vector to 'small element' vector, we + // must demand a source element if any DemandedElt maps to it. + if ((NumElts % NumSrcElts) == 0) { + unsigned Scale = NumElts / NumSrcElts; + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SrcDemandedElts.setBit(i / Scale); + + if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, + TLO, Depth + 1)) + return true; + + // Try calling SimplifyDemandedBits, converting demanded elts to the bits + // of the large element. + // TODO - bigendian once we have test coverage. + if (TLO.DAG.getDataLayout().isLittleEndian()) { + unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits(); + APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Ofs = (i % Scale) * EltSizeInBits; + SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits); + } + + KnownBits Known; + if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1)) + return true; + } + + // If the src element is zero/undef then all the output elements will be - + // only demanded elements are guaranteed to be correct. + for (unsigned i = 0; i != NumSrcElts; ++i) { + if (SrcDemandedElts[i]) { + if (SrcZero[i]) + KnownZero.setBits(i * Scale, (i + 1) * Scale); + if (SrcUndef[i]) + KnownUndef.setBits(i * Scale, (i + 1) * Scale); + } + } + } + + // Bitcast from 'small element' src vector to 'large element' vector, we + // demand all smaller source elements covered by the larger demanded element + // of this vector. + if ((NumSrcElts % NumElts) == 0) { + unsigned Scale = NumSrcElts / NumElts; + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale); + + if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, + TLO, Depth + 1)) + return true; + + // If all the src elements covering an output element are zero/undef, then + // the output element will be as well, assuming it was demanded. + for (unsigned i = 0; i != NumElts; ++i) { + if (DemandedElts[i]) { + if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue()) + KnownZero.setBit(i); + if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue()) + KnownUndef.setBit(i); + } + } + } + break; + } + case ISD::BUILD_VECTOR: { + // Check all elements and simplify any unused elements with UNDEF. + if (!DemandedElts.isAllOnesValue()) { + // Don't simplify BROADCASTS. + if (llvm::any_of(Op->op_values(), + [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) { + SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end()); + bool Updated = false; + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i] && !Ops[i].isUndef()) { + Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType()); + KnownUndef.setBit(i); + Updated = true; + } + } + if (Updated) + return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops)); + } + } + for (unsigned i = 0; i != NumElts; ++i) { + SDValue SrcOp = Op.getOperand(i); + if (SrcOp.isUndef()) { + KnownUndef.setBit(i); + } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() && + (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) { + KnownZero.setBit(i); + } + } + break; + } + case ISD::CONCAT_VECTORS: { + EVT SubVT = Op.getOperand(0).getValueType(); + unsigned NumSubVecs = Op.getNumOperands(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + for (unsigned i = 0; i != NumSubVecs; ++i) { + SDValue SubOp = Op.getOperand(i); + APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts); + APInt SubUndef, SubZero; + if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO, + Depth + 1)) + return true; + KnownUndef.insertBits(SubUndef, i * NumSubElts); + KnownZero.insertBits(SubZero, i * NumSubElts); + } + break; + } + case ISD::INSERT_SUBVECTOR: { + if (!isa<ConstantSDNode>(Op.getOperand(2))) + break; + SDValue Base = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + EVT SubVT = Sub.getValueType(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + const APInt &Idx = Op.getConstantOperandAPInt(2); + if (Idx.ugt(NumElts - NumSubElts)) + break; + unsigned SubIdx = Idx.getZExtValue(); + APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); + APInt SubUndef, SubZero; + if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO, + Depth + 1)) + return true; + APInt BaseElts = DemandedElts; + BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); + + // If none of the base operand elements are demanded, replace it with undef. + if (!BaseElts && !Base.isUndef()) + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + TLO.DAG.getUNDEF(VT), + Op.getOperand(1), + Op.getOperand(2))); + + if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO, + Depth + 1)) + return true; + KnownUndef.insertBits(SubUndef, SubIdx); + KnownZero.insertBits(SubZero, SubIdx); + break; + } + case ISD::EXTRACT_SUBVECTOR: { + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, + Depth + 1)) + return true; + KnownUndef = SrcUndef.extractBits(NumElts, Idx); + KnownZero = SrcZero.extractBits(NumElts, Idx); + } + break; + } + case ISD::INSERT_VECTOR_ELT: { + SDValue Vec = Op.getOperand(0); + SDValue Scl = Op.getOperand(1); + auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + + // For a legal, constant insertion index, if we don't need this insertion + // then strip it, else remove it from the demanded elts. + if (CIdx && CIdx->getAPIntValue().ult(NumElts)) { + unsigned Idx = CIdx->getZExtValue(); + if (!DemandedElts[Idx]) + return TLO.CombineTo(Op, Vec); + + APInt DemandedVecElts(DemandedElts); + DemandedVecElts.clearBit(Idx); + if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + + KnownUndef.clearBit(Idx); + if (Scl.isUndef()) + KnownUndef.setBit(Idx); + + KnownZero.clearBit(Idx); + if (isNullConstant(Scl) || isNullFPConstant(Scl)) + KnownZero.setBit(Idx); + break; + } + + APInt VecUndef, VecZero; + if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO, + Depth + 1)) + return true; + // Without knowing the insertion index we can't set KnownUndef/KnownZero. + break; + } + case ISD::VSELECT: { + // Try to transform the select condition based on the current demanded + // elements. + // TODO: If a condition element is undef, we can choose from one arm of the + // select (and if one arm is undef, then we can propagate that to the + // result). + // TODO - add support for constant vselect masks (see IR version of this). + APInt UnusedUndef, UnusedZero; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef, + UnusedZero, TLO, Depth + 1)) + return true; + + // See if we can simplify either vselect operand. + APInt DemandedLHS(DemandedElts); + APInt DemandedRHS(DemandedElts); + APInt UndefLHS, ZeroLHS; + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS, + ZeroLHS, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS, + ZeroRHS, TLO, Depth + 1)) + return true; + + KnownUndef = UndefLHS & UndefRHS; + KnownZero = ZeroLHS & ZeroRHS; + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); + + // Collect demanded elements from shuffle operands.. + APInt DemandedLHS(NumElts, 0); + APInt DemandedRHS(NumElts, 0); + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0 || !DemandedElts[i]) + continue; + assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range"); + if (M < (int)NumElts) + DemandedLHS.setBit(M); + else + DemandedRHS.setBit(M - NumElts); + } + + // See if we can simplify either shuffle operand. + APInt UndefLHS, ZeroLHS; + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS, + ZeroLHS, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS, + ZeroRHS, TLO, Depth + 1)) + return true; + + // Simplify mask using undef elements from LHS/RHS. + bool Updated = false; + bool IdentityLHS = true, IdentityRHS = true; + SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end()); + for (unsigned i = 0; i != NumElts; ++i) { + int &M = NewMask[i]; + if (M < 0) + continue; + if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) || + (M >= (int)NumElts && UndefRHS[M - NumElts])) { + Updated = true; + M = -1; + } + IdentityLHS &= (M < 0) || (M == (int)i); + IdentityRHS &= (M < 0) || ((M - NumElts) == i); + } + + // Update legal shuffle masks based on demanded elements if it won't reduce + // to Identity which can cause premature removal of the shuffle mask. + if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) { + SDValue LegalShuffle = + buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1), + NewMask, TLO.DAG); + if (LegalShuffle) + return TLO.CombineTo(Op, LegalShuffle); + } + + // Propagate undef/zero elements from LHS/RHS. + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0) { + KnownUndef.setBit(i); + } else if (M < (int)NumElts) { + if (UndefLHS[M]) + KnownUndef.setBit(i); + if (ZeroLHS[M]) + KnownZero.setBit(i); + } else { + if (UndefRHS[M - NumElts]) + KnownUndef.setBit(i); + if (ZeroRHS[M - NumElts]) + KnownZero.setBit(i); + } + } + break; + } + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: { + APInt SrcUndef, SrcZero; + SDValue Src = Op.getOperand(0); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts); + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, + Depth + 1)) + return true; + KnownZero = SrcZero.zextOrTrunc(NumElts); + KnownUndef = SrcUndef.zextOrTrunc(NumElts); + + if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG && + Op.getValueSizeInBits() == Src.getValueSizeInBits() && + DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) { + // aext - if we just need the bottom element then we can bitcast. + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); + } + + if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) { + // zext(undef) upper bits are guaranteed to be zero. + if (DemandedElts.isSubsetOf(KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + KnownUndef.clearAllBits(); + } + break; + } + + // TODO: There are more binop opcodes that could be handled here - MUL, MIN, + // MAX, saturated math, etc. + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: { + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, + ZeroRHS, TLO, Depth + 1)) + return true; + APInt UndefLHS, ZeroLHS; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, + ZeroLHS, TLO, Depth + 1)) + return true; + + KnownZero = ZeroLHS & ZeroRHS; + KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS); + break; + } + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::ROTL: + case ISD::ROTR: { + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, + ZeroRHS, TLO, Depth + 1)) + return true; + APInt UndefLHS, ZeroLHS; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, + ZeroLHS, TLO, Depth + 1)) + return true; + + KnownZero = ZeroLHS; + KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop? + break; + } + case ISD::MUL: + case ISD::AND: { + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, + SrcZero, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + + // If either side has a zero element, then the result element is zero, even + // if the other is an UNDEF. + // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros + // and then handle 'and' nodes with the rest of the binop opcodes. + KnownZero |= SrcZero; + KnownUndef &= SrcUndef; + KnownUndef &= ~KnownZero; + break; + } + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + + if (Op.getOpcode() == ISD::ZERO_EXTEND) { + // zext(undef) upper bits are guaranteed to be zero. + if (DemandedElts.isSubsetOf(KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + KnownUndef.clearAllBits(); + } + break; + default: { + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef, + KnownZero, TLO, Depth)) + return true; + } else { + KnownBits Known; + APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits); + if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known, + TLO, Depth, AssumeSingleUse)) + return true; + } + break; + } + } + assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero"); + + // Constant fold all undef cases. + // TODO: Handle zero cases as well. + if (DemandedElts.isSubsetOf(KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + + return false; +} + +/// Determine which of the bits specified in Mask are known to be either zero or +/// one and return them in the Known. +void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + Known.resetAll(); +} + +void TargetLowering::computeKnownBitsForTargetInstr( + GISelKnownBits &Analysis, Register R, KnownBits &Known, + const APInt &DemandedElts, const MachineRegisterInfo &MRI, + unsigned Depth) const { + Known.resetAll(); +} + +void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex"); + + if (unsigned Align = DAG.InferPtrAlignment(Op)) { + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2_32(Align)); + } +} + +/// This method can be implemented by targets that want to expose additional +/// information about sign bits to the DAG Combiner. +unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + const APInt &, + const SelectionDAG &, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use ComputeNumSignBits if you don't know whether Op" + " is a target node!"); + return 1; +} + +bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode( + SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, + TargetLoweringOpt &TLO, unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use SimplifyDemandedVectorElts if you don't know whether Op" + " is a target node!"); + return false; +} + +bool TargetLowering::SimplifyDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use SimplifyDemandedBits if you don't know whether Op" + " is a target node!"); + computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth); + return false; +} + +SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + assert( + (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" + " is a target node!"); + return SDValue(); +} + +SDValue +TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, + SDValue N1, MutableArrayRef<int> Mask, + SelectionDAG &DAG) const { + bool LegalMask = isShuffleMaskLegal(Mask, VT); + if (!LegalMask) { + std::swap(N0, N1); + ShuffleVectorSDNode::commuteMask(Mask); + LegalMask = isShuffleMaskLegal(Mask, VT); + } + + if (!LegalMask) + return SDValue(); + + return DAG.getVectorShuffle(VT, DL, N0, N1, Mask); +} + +const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const { + return nullptr; +} + +bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, + const SelectionDAG &DAG, + bool SNaN, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use isKnownNeverNaN if you don't know whether Op" + " is a target node!"); + return false; +} + +// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must +// work with truncating build vectors and vectors with elements of less than +// 8 bits. +bool TargetLowering::isConstTrueVal(const SDNode *N) const { + if (!N) + return false; + + APInt CVal; + if (auto *CN = dyn_cast<ConstantSDNode>(N)) { + CVal = CN->getAPIntValue(); + } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) { + auto *CN = BV->getConstantSplatNode(); + if (!CN) + return false; + + // If this is a truncating build vector, truncate the splat value. + // Otherwise, we may fail to match the expected values below. + unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits(); + CVal = CN->getAPIntValue(); + if (BVEltWidth < CVal.getBitWidth()) + CVal = CVal.trunc(BVEltWidth); + } else { + return false; + } + + switch (getBooleanContents(N->getValueType(0))) { + case UndefinedBooleanContent: + return CVal[0]; + case ZeroOrOneBooleanContent: + return CVal.isOneValue(); + case ZeroOrNegativeOneBooleanContent: + return CVal.isAllOnesValue(); + } + + llvm_unreachable("Invalid boolean contents"); +} + +bool TargetLowering::isConstFalseVal(const SDNode *N) const { + if (!N) + return false; + + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) { + const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + // Only interested in constant splats, we don't care about undef + // elements in identifying boolean constants and getConstantSplatNode + // returns NULL if all ops are undef; + CN = BV->getConstantSplatNode(); + if (!CN) + return false; + } + + if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) + return !CN->getAPIntValue()[0]; + + return CN->isNullValue(); +} + +bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, + bool SExt) const { + if (VT == MVT::i1) + return N->isOne(); + + TargetLowering::BooleanContent Cnt = getBooleanContents(VT); + switch (Cnt) { + case TargetLowering::ZeroOrOneBooleanContent: + // An extended value of 1 is always true, unless its original type is i1, + // in which case it will be sign extended to -1. + return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1)); + case TargetLowering::UndefinedBooleanContent: + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return N->isAllOnesValue() && SExt; + } + llvm_unreachable("Unexpected enumeration."); +} + +/// This helper function of SimplifySetCC tries to optimize the comparison when +/// either operand of the SetCC node is a bitwise-and instruction. +SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &DL, + DAGCombinerInfo &DCI) const { + // Match these patterns in any of their permutations: + // (X & Y) == Y + // (X & Y) != Y + if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND) + std::swap(N0, N1); + + EVT OpVT = N0.getValueType(); + if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() || + (Cond != ISD::SETEQ && Cond != ISD::SETNE)) + return SDValue(); + + SDValue X, Y; + if (N0.getOperand(0) == N1) { + X = N0.getOperand(1); + Y = N0.getOperand(0); + } else if (N0.getOperand(1) == N1) { + X = N0.getOperand(0); + Y = N0.getOperand(1); + } else { + return SDValue(); + } + + SelectionDAG &DAG = DCI.DAG; + SDValue Zero = DAG.getConstant(0, DL, OpVT); + if (DAG.isKnownToBeAPowerOfTwo(Y)) { + // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. + // Note that where Y is variable and is known to have at most one bit set + // (for example, if it is Z & 1) we cannot do this; the expressions are not + // equivalent when Y == 0. + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N0.getSimpleValueType())) + return DAG.getSetCC(DL, VT, N0, Zero, Cond); + } else if (N0.hasOneUse() && hasAndNotCompare(Y)) { + // If the target supports an 'and-not' or 'and-complement' logic operation, + // try to use that to make a comparison operation more efficient. + // But don't do this transform if the mask is a single bit because there are + // more efficient ways to deal with that case (for example, 'bt' on x86 or + // 'rlwinm' on PPC). + + // Bail out if the compare operand that we want to turn into a zero is + // already a zero (otherwise, infinite loop). + auto *YConst = dyn_cast<ConstantSDNode>(Y); + if (YConst && YConst->isNullValue()) + return SDValue(); + + // Transform this into: ~X & Y == 0. + SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT); + SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y); + return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond); + } + + return SDValue(); +} + +/// There are multiple IR patterns that could be checking whether certain +/// truncation of a signed number would be lossy or not. The pattern which is +/// best at IR level, may not lower optimally. Thus, we want to unfold it. +/// We are looking for the following pattern: (KeptBits is a constant) +/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) +/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false. +/// KeptBits also can't be 1, that would have been folded to %x dstcond 0 +/// We will unfold it into the natural trunc+sext pattern: +/// ((%x << C) a>> C) dstcond %x +/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x) +SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( + EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL) const { + // We must be comparing with a constant. + ConstantSDNode *C1; + if (!(C1 = dyn_cast<ConstantSDNode>(N1))) + return SDValue(); + + // N0 should be: add %x, (1 << (KeptBits-1)) + if (N0->getOpcode() != ISD::ADD) + return SDValue(); + + // And we must be 'add'ing a constant. + ConstantSDNode *C01; + if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1)))) + return SDValue(); + + SDValue X = N0->getOperand(0); + EVT XVT = X.getValueType(); + + // Validate constants ... + + APInt I1 = C1->getAPIntValue(); + + ISD::CondCode NewCond; + if (Cond == ISD::CondCode::SETULT) { + NewCond = ISD::CondCode::SETEQ; + } else if (Cond == ISD::CondCode::SETULE) { + NewCond = ISD::CondCode::SETEQ; + // But need to 'canonicalize' the constant. + I1 += 1; + } else if (Cond == ISD::CondCode::SETUGT) { + NewCond = ISD::CondCode::SETNE; + // But need to 'canonicalize' the constant. + I1 += 1; + } else if (Cond == ISD::CondCode::SETUGE) { + NewCond = ISD::CondCode::SETNE; + } else + return SDValue(); + + APInt I01 = C01->getAPIntValue(); + + auto checkConstants = [&I1, &I01]() -> bool { + // Both of them must be power-of-two, and the constant from setcc is bigger. + return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2(); + }; + + if (checkConstants()) { + // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256 + } else { + // What if we invert constants? (and the target predicate) + I1.negate(); + I01.negate(); + NewCond = getSetCCInverse(NewCond, /*isInteger=*/true); + if (!checkConstants()) + return SDValue(); + // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256 + } + + // They are power-of-two, so which bit is set? + const unsigned KeptBits = I1.logBase2(); + const unsigned KeptBitsMinusOne = I01.logBase2(); + + // Magic! + if (KeptBits != (KeptBitsMinusOne + 1)) + return SDValue(); + assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable"); + + // We don't want to do this in every single case. + SelectionDAG &DAG = DCI.DAG; + if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck( + XVT, KeptBits)) + return SDValue(); + + const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits; + assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable"); + + // Unfold into: ((%x << C) a>> C) cond %x + // Where 'cond' will be either 'eq' or 'ne'. + SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT); + SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt); + SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond); + + return T2; +} + +// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 +SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const { + assert(isConstOrConstSplat(N1C) && + isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && + "Should be a comparison with 0."); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Valid only for [in]equality comparisons."); + + unsigned NewShiftOpcode; + SDValue X, C, Y; + + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Look for '(C l>>/<< Y)'. + auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) { + // The shift should be one-use. + if (!V.hasOneUse()) + return false; + unsigned OldShiftOpcode = V.getOpcode(); + switch (OldShiftOpcode) { + case ISD::SHL: + NewShiftOpcode = ISD::SRL; + break; + case ISD::SRL: + NewShiftOpcode = ISD::SHL; + break; + default: + return false; // must be a logical shift. + } + // We should be shifting a constant. + // FIXME: best to use isConstantOrConstantVector(). + C = V.getOperand(0); + ConstantSDNode *CC = + isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + if (!CC) + return false; + Y = V.getOperand(1); + + ConstantSDNode *XC = + isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG); + }; + + // LHS of comparison should be an one-use 'and'. + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) + return SDValue(); + + X = N0.getOperand(0); + SDValue Mask = N0.getOperand(1); + + // 'and' is commutative! + if (!Match(Mask)) { + std::swap(X, Mask); + if (!Match(Mask)) + return SDValue(); + } + + EVT VT = X.getValueType(); + + // Produce: + // ((X 'OppositeShiftOpcode' Y) & C) Cond 0 + SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y); + SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond); + return T2; +} + +/// Try to fold an equality comparison with a {add/sub/xor} binary operation as +/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to +/// handle the commuted versions of these patterns. +SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &DL, + DAGCombinerInfo &DCI) const { + unsigned BOpcode = N0.getOpcode(); + assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) && + "Unexpected binop"); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode"); + + // (X + Y) == X --> Y == 0 + // (X - Y) == X --> Y == 0 + // (X ^ Y) == X --> Y == 0 + SelectionDAG &DAG = DCI.DAG; + EVT OpVT = N0.getValueType(); + SDValue X = N0.getOperand(0); + SDValue Y = N0.getOperand(1); + if (X == N1) + return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond); + + if (Y != N1) + return SDValue(); + + // (X + Y) == Y --> X == 0 + // (X ^ Y) == Y --> X == 0 + if (BOpcode == ISD::ADD || BOpcode == ISD::XOR) + return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond); + + // The shift would not be valid if the operands are boolean (i1). + if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1) + return SDValue(); + + // (X - Y) == Y --> X == Y << 1 + EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(), + !DCI.isBeforeLegalize()); + SDValue One = DAG.getConstant(1, DL, ShiftVT); + SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(YShl1.getNode()); + return DAG.getSetCC(DL, VT, X, YShl1, Cond); +} + +/// Try to simplify a setcc built with the specified operands and cc. If it is +/// unable to simplify it, return a null SDValue. +SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, bool foldBooleans, + DAGCombinerInfo &DCI, + const SDLoc &dl) const { + SelectionDAG &DAG = DCI.DAG; + EVT OpVT = N0.getValueType(); + + // Constant fold or commute setcc. + if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl)) + return Fold; + + // Ensure that the constant occurs on the RHS and fold constant comparisons. + // TODO: Handle non-splat vector constants. All undef causes trouble. + ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); + if (isConstOrConstSplat(N0) && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) + return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); + + // If we have a subtract with the same 2 non-constant operands as this setcc + // -- but in reverse order -- then try to commute the operands of this setcc + // to match. A matching pair of setcc (cmp) and sub may be combined into 1 + // instruction on some targets. + if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) && + DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) && + !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } )) + return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); + + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an + // equality comparison, then we're just comparing whether X itself is + // zero. + if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && + N0.getOperand(0).getOpcode() == ISD::CTLZ && + N0.getOperand(1).getOpcode() == ISD::Constant) { + const APInt &ShAmt = N0.getConstantOperandAPInt(1); + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt == Log2_32(N0.getValueSizeInBits())) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; + } + SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); + return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), + Zero, Cond); + } + } + + SDValue CTPOP = N0; + // Look through truncs that don't change the value of a ctpop. + if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE) + CTPOP = N0.getOperand(0); + + if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && + (N0 == CTPOP || + N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) { + EVT CTVT = CTPOP.getValueType(); + SDValue CTOp = CTPOP.getOperand(0); + + // (ctpop x) u< 2 -> (x & x-1) == 0 + // (ctpop x) u> 1 -> (x & x-1) != 0 + if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); + ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; + return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); + } + + // If ctpop is not supported, expand a power-of-2 comparison based on it. + if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) + // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) + SDValue Zero = DAG.getConstant(0, dl, CTVT); + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true); + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); + SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); + SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond); + unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR; + return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS); + } + } + + // (zext x) == C --> x == (trunc C) + // (sext x) == C --> x == (trunc C) + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + DCI.isBeforeLegalize() && N0->hasOneUse()) { + unsigned MinBits = N0.getValueSizeInBits(); + SDValue PreExt; + bool Signed = false; + if (N0->getOpcode() == ISD::ZERO_EXTEND) { + // ZExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreExt = N0->getOperand(0); + } else if (N0->getOpcode() == ISD::AND) { + // DAGCombine turns costly ZExts into ANDs + if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) + if ((C->getAPIntValue()+1).isPowerOf2()) { + MinBits = C->getAPIntValue().countTrailingOnes(); + PreExt = N0->getOperand(0); + } + } else if (N0->getOpcode() == ISD::SIGN_EXTEND) { + // SExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreExt = N0->getOperand(0); + Signed = true; + } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) { + // ZEXTLOAD / SEXTLOAD + if (LN0->getExtensionType() == ISD::ZEXTLOAD) { + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreExt = N0; + } else if (LN0->getExtensionType() == ISD::SEXTLOAD) { + Signed = true; + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreExt = N0; + } + } + + // Figure out how many bits we need to preserve this constant. + unsigned ReqdBits = Signed ? + C1.getBitWidth() - C1.getNumSignBits() + 1 : + C1.getActiveBits(); + + // Make sure we're not losing bits from the constant. + if (MinBits > 0 && + MinBits < C1.getBitWidth() && + MinBits >= ReqdBits) { + EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); + if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { + // Will get folded away. + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt); + if (MinBits == 1 && C1 == 1) + // Invert the condition. + return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT); + return DAG.getSetCC(dl, VT, Trunc, C, Cond); + } + + // If truncating the setcc operands is not desirable, we can still + // simplify the expression in some cases: + // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc) + // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc)) + // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc)) + // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc) + // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc)) + // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc) + SDValue TopSetCC = N0->getOperand(0); + unsigned N0Opc = N0->getOpcode(); + bool SExt = (N0Opc == ISD::SIGN_EXTEND); + if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 && + TopSetCC.getOpcode() == ISD::SETCC && + (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) && + (isConstFalseVal(N1C) || + isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) { + + bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) || + (!N1C->isNullValue() && Cond == ISD::SETNE); + + if (!Inverse) + return TopSetCC; + + ISD::CondCode InvCond = ISD::getSetCCInverse( + cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(), + TopSetCC.getOperand(0).getValueType().isInteger()); + return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0), + TopSetCC.getOperand(1), + InvCond); + } + } + } + + // If the LHS is '(and load, const)', the RHS is 0, the test is for + // equality or unsigned, and all 1 bits of the const are in the same + // partial word, see if we can shorten the load. + if (DCI.isBeforeLegalize() && + !ISD::isSignedIntSetCC(Cond) && + N0.getOpcode() == ISD::AND && C1 == 0 && + N0.getNode()->hasOneUse() && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(0).getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); + APInt bestMask; + unsigned bestWidth = 0, bestOffset = 0; + if (Lod->isSimple() && Lod->isUnindexed()) { + unsigned origWidth = N0.getValueSizeInBits(); + unsigned maskWidth = origWidth; + // We can narrow (e.g.) 16-bit extending loads on 32-bit target to + // 8 bits, but have to be careful... + if (Lod->getExtensionType() != ISD::NON_EXTLOAD) + origWidth = Lod->getMemoryVT().getSizeInBits(); + const APInt &Mask = N0.getConstantOperandAPInt(1); + for (unsigned width = origWidth / 2; width>=8; width /= 2) { + APInt newMask = APInt::getLowBitsSet(maskWidth, width); + for (unsigned offset=0; offset<origWidth/width; offset++) { + if (Mask.isSubsetOf(newMask)) { + if (DAG.getDataLayout().isLittleEndian()) + bestOffset = (uint64_t)offset * (width/8); + else + bestOffset = (origWidth/width - offset - 1) * (width/8); + bestMask = Mask.lshr(offset * (width/8) * 8); + bestWidth = width; + break; + } + newMask <<= width; + } + } + } + if (bestWidth) { + EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); + if (newVT.isRound() && + shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) { + EVT PtrType = Lod->getOperand(1).getValueType(); + SDValue Ptr = Lod->getBasePtr(); + if (bestOffset != 0) + Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), + DAG.getConstant(bestOffset, dl, PtrType)); + unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); + SDValue NewLoad = DAG.getLoad( + newVT, dl, Lod->getChain(), Ptr, + Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign); + return DAG.getSetCC(dl, VT, + DAG.getNode(ISD::AND, dl, newVT, NewLoad, + DAG.getConstant(bestMask.trunc(bestWidth), + dl, newVT)), + DAG.getConstant(0LL, dl, newVT), Cond); + } + } + } + + // If the LHS is a ZERO_EXTEND, perform the comparison on the input. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + unsigned InSize = N0.getOperand(0).getValueSizeInBits(); + + // If the comparison constant has bits in the upper part, the + // zero-extended value could never match. + if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), + C1.getBitWidth() - InSize))) { + switch (Cond) { + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETEQ: + return DAG.getConstant(0, dl, VT); + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETNE: + return DAG.getConstant(1, dl, VT); + case ISD::SETGT: + case ISD::SETGE: + // True if the sign bit of C1 is set. + return DAG.getConstant(C1.isNegative(), dl, VT); + case ISD::SETLT: + case ISD::SETLE: + // True if the sign bit of C1 isn't set. + return DAG.getConstant(C1.isNonNegative(), dl, VT); + default: + break; + } + } + + // Otherwise, we can perform the comparison with the low bits. + switch (Cond) { + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: { + EVT newVT = N0.getOperand(0).getValueType(); + if (DCI.isBeforeLegalizeOps() || + (isOperationLegal(ISD::SETCC, newVT) && + isCondCodeLegal(Cond, newVT.getSimpleVT()))) { + EVT NewSetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT); + SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); + + SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), + NewConst, Cond); + return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType()); + } + break; + } + default: + break; // todo, be more careful with signed comparisons + } + } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); + unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); + EVT ExtDstTy = N0.getValueType(); + unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); + + // If the constant doesn't fit into the number of bits for the source of + // the sign extension, it is impossible for both sides to be equal. + if (C1.getMinSignedBits() > ExtSrcTyBits) + return DAG.getConstant(Cond == ISD::SETNE, dl, VT); + + SDValue ZextOp; + EVT Op0Ty = N0.getOperand(0).getValueType(); + if (Op0Ty == ExtSrcTy) { + ZextOp = N0.getOperand(0); + } else { + APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); + ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), + DAG.getConstant(Imm, dl, Op0Ty)); + } + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(ZextOp.getNode()); + // Otherwise, make this a use of a zext. + return DAG.getSetCC(dl, VT, ZextOp, + DAG.getConstant(C1 & APInt::getLowBitsSet( + ExtDstTyBits, + ExtSrcTyBits), + dl, ExtDstTy), + Cond); + } else if ((N1C->isNullValue() || N1C->isOne()) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC + if (N0.getOpcode() == ISD::SETCC && + isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne()); + if (TrueWhenTrue) + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); + // Invert the condition. + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + CC = ISD::getSetCCInverse(CC, + N0.getOperand(0).getValueType().isInteger()); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + } + + if ((N0.getOpcode() == ISD::XOR || + (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR && + N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && + isa<ConstantSDNode>(N0.getOperand(1)) && + cast<ConstantSDNode>(N0.getOperand(1))->isOne()) { + // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We + // can only do this if the top bits are known zero. + unsigned BitWidth = N0.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N0, + APInt::getHighBitsSet(BitWidth, + BitWidth-1))) { + // Okay, get the un-inverted input value. + SDValue Val; + if (N0.getOpcode() == ISD::XOR) { + Val = N0.getOperand(0); + } else { + assert(N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR); + // ((X^1)&1)^1 -> X & 1 + Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), + N0.getOperand(0).getOperand(0), + N0.getOperand(1)); + } + + return DAG.getSetCC(dl, VT, Val, N1, + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } else if (N1C->isOne() && + (VT == MVT::i1 || + getBooleanContents(N0->getValueType(0)) == + ZeroOrOneBooleanContent)) { + SDValue Op0 = N0; + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + + if ((Op0.getOpcode() == ISD::XOR) && + Op0.getOperand(0).getOpcode() == ISD::SETCC && + Op0.getOperand(1).getOpcode() == ISD::SETCC) { + // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) + Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; + return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), + Cond); + } + if (Op0.getOpcode() == ISD::AND && + isa<ConstantSDNode>(Op0.getOperand(1)) && + cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) { + // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. + if (Op0.getValueType().bitsGT(VT)) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, dl, VT)); + else if (Op0.getValueType().bitsLT(VT)) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, dl, VT)); + + return DAG.getSetCC(dl, VT, Op0, + DAG.getConstant(0, dl, Op0.getValueType()), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + if (Op0.getOpcode() == ISD::AssertZext && + cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1) + return DAG.getSetCC(dl, VT, Op0, + DAG.getConstant(0, dl, Op0.getValueType()), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } + + // Given: + // icmp eq/ne (urem %x, %y), 0 + // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': + // icmp eq/ne %x, 0 + if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0)); + KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1)); + if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond); + } + + if (SDValue V = + optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) + return V; + } + + // These simplifications apply to splat vectors as well. + // TODO: Handle more splat vector cases. + if (auto *N1C = isConstOrConstSplat(N1)) { + const APInt &C1 = N1C->getAPIntValue(); + + APInt MinVal, MaxVal; + unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits(); + if (ISD::isSignedIntSetCC(Cond)) { + MinVal = APInt::getSignedMinValue(OperandBitSize); + MaxVal = APInt::getSignedMaxValue(OperandBitSize); + } else { + MinVal = APInt::getMinValue(OperandBitSize); + MaxVal = APInt::getMaxValue(OperandBitSize); + } + + // Canonicalize GE/LE comparisons to use GT/LT comparisons. + if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { + // X >= MIN --> true + if (C1 == MinVal) + return DAG.getBoolConstant(true, dl, VT, OpVT); + + if (!VT.isVector()) { // TODO: Support this for vectors. + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } + } + } + + if (Cond == ISD::SETLE || Cond == ISD::SETULE) { + // X <= MAX --> true + if (C1 == MaxVal) + return DAG.getBoolConstant(true, dl, VT, OpVT); + + // X <= C0 --> X < (C0 + 1) + if (!VT.isVector()) { // TODO: Support this for vectors. + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } + } + } + + if (Cond == ISD::SETLT || Cond == ISD::SETULT) { + if (C1 == MinVal) + return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false + + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // Canonicalize setlt X, Max --> setne X, Max + if (C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if (C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, dl, N0.getValueType()), + ISD::SETEQ); + } + } + + if (Cond == ISD::SETGT || Cond == ISD::SETUGT) { + if (C1 == MaxVal) + return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false + + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // Canonicalize setgt X, Min --> setne X, Min + if (C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setugt X, Max-1, turn it into seteq X, Max + if (C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, dl, N0.getValueType()), + ISD::SETEQ); + } + } + + if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 + if (C1.isNullValue()) + if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( + VT, N0, N1, Cond, DCI, dl)) + return CC; + } + + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, dl, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } + } + } + + // Back to non-vector simplifications. + // TODO: Can we do these for vector splats? + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const APInt &C1 = N1C->getAPIntValue(); + EVT ShValTy = N0.getValueType(); + + // Fold bit comparisons when we can. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && + N0.getOpcode() == ISD::AND) { + auto &DL = DAG.getDataLayout(); + if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize()); + if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 + // Perform the xform if the AND RHS is a single bit. + unsigned ShCt = AndRHS->getAPIntValue().logBase2(); + if (AndRHS->getAPIntValue().isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); + } + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { + // (X & 8) == 8 --> (X & 8) >> 3 + // Perform the xform if C1 is a single bit. + unsigned ShCt = C1.logBase2(); + if (C1.isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); + } + } + } + } + + if (C1.getMinSignedBits() <= 64 && + !isLegalICmpImmediate(C1.getSExtValue())) { + // (X & -256) == 256 -> (X >> 8) == 1 + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getOpcode() == ISD::AND && N0.hasOneUse()) { + if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + const APInt &AndRHSC = AndRHS->getAPIntValue(); + if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { + unsigned ShiftBits = AndRHSC.countTrailingZeros(); + auto &DL = DAG.getDataLayout(); + EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), + DAG.getConstant(ShiftBits, dl, + ShiftTy)); + SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + } + } + } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || + Cond == ISD::SETULE || Cond == ISD::SETUGT) { + bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT); + // X < 0x100000000 -> (X >> 32) < 1 + // X >= 0x100000000 -> (X >> 32) >= 1 + // X <= 0x0ffffffff -> (X >> 32) < 1 + // X > 0x0ffffffff -> (X >> 32) >= 1 + unsigned ShiftBits; + APInt NewC = C1; + ISD::CondCode NewCond = Cond; + if (AdjOne) { + ShiftBits = C1.countTrailingOnes(); + NewC = NewC + 1; + NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + } else { + ShiftBits = C1.countTrailingZeros(); + } + NewC.lshrInPlace(ShiftBits); + if (ShiftBits && NewC.getMinSignedBits() <= 64 && + isLegalICmpImmediate(NewC.getSExtValue())) { + auto &DL = DAG.getDataLayout(); + EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, + DAG.getConstant(ShiftBits, dl, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); + } + } + } + } + + if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) { + auto *CFP = cast<ConstantFPSDNode>(N1); + assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value"); + + // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the + // constant if knowing that the operand is non-nan is enough. We prefer to + // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to + // materialize 0.0. + if (Cond == ISD::SETO || Cond == ISD::SETUO) + return DAG.getSetCC(dl, VT, N0, N0, Cond); + + // setcc (fneg x), C -> setcc swap(pred) x, -C + if (N0.getOpcode() == ISD::FNEG) { + ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(SwapCond, N0.getSimpleValueType())) { + SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1); + return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond); + } + } + + // If the condition is not legal, see if we can find an equivalent one + // which is legal. + if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) { + // If the comparison was an awkward floating-point == or != and one of + // the comparison operands is infinity or negative infinity, convert the + // condition to a less-awkward <= or >=. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); + } else { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); + } + } + } + } + + if (N0 == N1) { + // The sext(setcc()) => setcc() optimization relies on the appropriate + // constant being emitted. + assert(!N0.getValueType().isInteger() && + "Integer types should be handled by FoldSetCC"); + + bool EqTrue = ISD::isTrueWhenEqual(Cond); + unsigned UOF = ISD::getUnorderedFlavor(Cond); + if (UOF == 2) // FP operators that are undefined on NaNs. + return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); + if (UOF == unsigned(EqTrue)) + return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); + // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO + // if it is not already. + ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; + if (NewCond != Cond && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCond, N0.getSimpleValueType()))) + return DAG.getSetCC(dl, VT, N0, N1, NewCond); + } + + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getValueType().isInteger()) { + if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || + N0.getOpcode() == ISD::XOR) { + // Simplify (X+Y) == (X+Z) --> Y == Z + if (N0.getOpcode() == N1.getOpcode()) { + if (N0.getOperand(0) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); + if (N0.getOperand(1) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); + if (isCommutativeBinOp(N0.getOpcode())) { + // If X op Y == Y op X, try other combinations. + if (N0.getOperand(0) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), + Cond); + if (N0.getOperand(1) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), + Cond); + } + } + + // If RHS is a legal immediate value for a compare instruction, we need + // to be careful about increasing register pressure needlessly. + bool LegalRHSImm = false; + + if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) { + if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + // Turn (X+C1) == C2 --> X == C2-C1 + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(RHSC->getAPIntValue()- + LHSR->getAPIntValue(), + dl, N0.getValueType()), Cond); + } + + // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. + if (N0.getOpcode() == ISD::XOR) + // If we know that all of the inverted bits are zero, don't bother + // performing the inversion. + if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) + return + DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(LHSR->getAPIntValue() ^ + RHSC->getAPIntValue(), + dl, N0.getValueType()), + Cond); + } + + // Turn (C1-X) == C2 --> X == C1-C2 + if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { + if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { + return + DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(SUBC->getAPIntValue() - + RHSC->getAPIntValue(), + dl, N0.getValueType()), + Cond); + } + } + + // Could RHSC fold directly into a compare? + if (RHSC->getValueType(0).getSizeInBits() <= 64) + LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue()); + } + + // (X+Y) == X --> Y == 0 and similar folds. + // Don't do this if X is an immediate that can fold into a cmp + // instruction and X+Y has other uses. It could be an induction variable + // chain, and the transform would increase register pressure. + if (!LegalRHSImm || N0.hasOneUse()) + if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI)) + return V; + } + + if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || + N1.getOpcode() == ISD::XOR) + if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI)) + return V; + + if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI)) + return V; + } + + // Fold remainder of division by a constant. + if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) && + N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + + // When division is cheap or optimizing for minimum size, + // fall through to DIVREM creation by skipping this fold. + if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) { + if (N0.getOpcode() == ISD::UREM) { + if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } else if (N0.getOpcode() == ISD::SREM) { + if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } + } + } + + // Fold away ALL boolean setcc's. + if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) { + SDValue Temp; + switch (Cond) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETEQ: // X == Y -> ~(X^Y) + Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); + N0 = DAG.getNOT(dl, Temp, OpVT); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETNE: // X != Y --> (X^Y) + N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); + break; + case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y + case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y + Temp = DAG.getNOT(dl, N0, OpVT); + N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X + case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X + Temp = DAG.getNOT(dl, N1, OpVT); + N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y + case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y + Temp = DAG.getNOT(dl, N0, OpVT); + N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X + case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X + Temp = DAG.getNOT(dl, N1, OpVT); + N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp); + break; + } + if (VT.getScalarType() != MVT::i1) { + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(N0.getNode()); + // FIXME: If running after legalize, we probably can't do this. + ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT)); + N0 = DAG.getNode(ExtendCode, dl, VT, N0); + } + return N0; + } + + // Could not fold it. + return SDValue(); +} + +/// Returns true (and the GlobalValue and the offset) if the node is a +/// GlobalAddress + offset. +bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA, + int64_t &Offset) const { + + SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode(); + + if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) { + GA = GASD->getGlobal(); + Offset += GASD->getOffset(); + return true; + } + + if (N->getOpcode() == ISD::ADD) { + SDValue N1 = N->getOperand(0); + SDValue N2 = N->getOperand(1); + if (isGAPlusOffset(N1.getNode(), GA, Offset)) { + if (auto *V = dyn_cast<ConstantSDNode>(N2)) { + Offset += V->getSExtValue(); + return true; + } + } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { + if (auto *V = dyn_cast<ConstantSDNode>(N1)) { + Offset += V->getSExtValue(); + return true; + } + } + } + + return false; +} + +SDValue TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + // Default implementation: no optimization. + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembler Implementation Methods +//===----------------------------------------------------------------------===// + +TargetLowering::ConstraintType +TargetLowering::getConstraintType(StringRef Constraint) const { + unsigned S = Constraint.size(); + + if (S == 1) { + switch (Constraint[0]) { + default: break; + case 'r': + return C_RegisterClass; + case 'm': // memory + case 'o': // offsetable + case 'V': // not offsetable + return C_Memory; + case 'n': // Simple Integer + case 'E': // Floating Point Constant + case 'F': // Floating Point Constant + return C_Immediate; + case 'i': // Simple Integer or Relocatable Constant + case 's': // Relocatable Constant + case 'p': // Address. + case 'X': // Allow ANY value. + case 'I': // Target registers. + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case '<': + case '>': + return C_Other; + } + } + + if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') { + if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}" + return C_Memory; + return C_Register; + } + return C_Unknown; +} + +/// Try to replace an X constraint, which matches anything, with another that +/// has more specific requirements based on the type of the corresponding +/// operand. +const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const { + if (ConstraintVT.isInteger()) + return "r"; + if (ConstraintVT.isFloatingPoint()) + return "f"; // works for many targets + return nullptr; +} + +SDValue TargetLowering::LowerAsmOutputForConstraint( + SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo, + SelectionDAG &DAG) const { + return SDValue(); +} + +/// Lower the specified operand into the Ops vector. +/// If it is invalid, don't add anything to Ops. +void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + + if (Constraint.length() > 1) return; + + char ConstraintLetter = Constraint[0]; + switch (ConstraintLetter) { + default: break; + case 'X': // Allows any operand; labels (basic block) use this. + if (Op.getOpcode() == ISD::BasicBlock || + Op.getOpcode() == ISD::TargetBlockAddress) { + Ops.push_back(Op); + return; + } + LLVM_FALLTHROUGH; + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': { // Relocatable Constant + + GlobalAddressSDNode *GA; + ConstantSDNode *C; + BlockAddressSDNode *BA; + uint64_t Offset = 0; + + // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C), + // etc., since getelementpointer is variadic. We can't use + // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible + // while in this case the GA may be furthest from the root node which is + // likely an ISD::ADD. + while (1) { + if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') { + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), + GA->getValueType(0), + Offset + GA->getOffset())); + return; + } else if ((C = dyn_cast<ConstantSDNode>(Op)) && + ConstraintLetter != 's') { + // gcc prints these as sign extended. Sign extend value to 64 bits + // now; without this it would get ZExt'd later in + // ScheduleDAGSDNodes::EmitNode, which is very generic. + bool IsBool = C->getConstantIntValue()->getBitWidth() == 1; + BooleanContent BCont = getBooleanContents(MVT::i64); + ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont) + : ISD::SIGN_EXTEND; + int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() + : C->getSExtValue(); + Ops.push_back(DAG.getTargetConstant(Offset + ExtVal, + SDLoc(C), MVT::i64)); + return; + } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) && + ConstraintLetter != 'n') { + Ops.push_back(DAG.getTargetBlockAddress( + BA->getBlockAddress(), BA->getValueType(0), + Offset + BA->getOffset(), BA->getTargetFlags())); + return; + } else { + const unsigned OpCode = Op.getOpcode(); + if (OpCode == ISD::ADD || OpCode == ISD::SUB) { + if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0)))) + Op = Op.getOperand(1); + // Subtraction is not commutative. + else if (OpCode == ISD::ADD && + (C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))) + Op = Op.getOperand(0); + else + return; + Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue(); + continue; + } + } + return; + } + break; + } + } +} + +std::pair<unsigned, const TargetRegisterClass *> +TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, + StringRef Constraint, + MVT VT) const { + if (Constraint.empty() || Constraint[0] != '{') + return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr)); + assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?"); + + // Remove the braces from around the name. + StringRef RegName(Constraint.data() + 1, Constraint.size() - 2); + + std::pair<unsigned, const TargetRegisterClass *> R = + std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr)); + + // Figure out which register class contains this reg. + for (const TargetRegisterClass *RC : RI->regclasses()) { + // If none of the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + if (!isLegalRC(*RI, *RC)) + continue; + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + if (RegName.equals_lower(RI->getRegAsmName(*I))) { + std::pair<unsigned, const TargetRegisterClass *> S = + std::make_pair(*I, RC); + + // If this register class has the requested value type, return it, + // otherwise keep searching and return the first class found + // if no other is found which explicitly has the requested type. + if (RI->isTypeLegalForClass(*RC, VT)) + return S; + if (!R.second) + R = S; + } + } + } + + return R; +} + +//===----------------------------------------------------------------------===// +// Constraint Selection. + +/// Return true of this is an input operand that is a matching constraint like +/// "4". +bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return isdigit(static_cast<unsigned char>(ConstraintCode[0])); +} + +/// If this is an input matching constraint, this method returns the output +/// operand it matches. +unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return atoi(ConstraintCode.c_str()); +} + +/// Split up the constraint string from the inline assembly value into the +/// specific constraints and their prefixes, and also tie in the associated +/// operand values. +/// If this returns an empty vector, and if the constraint string itself +/// isn't empty, there was an error parsing. +TargetLowering::AsmOperandInfoVector +TargetLowering::ParseConstraints(const DataLayout &DL, + const TargetRegisterInfo *TRI, + ImmutableCallSite CS) const { + /// Information about all of the constraints. + AsmOperandInfoVector ConstraintOperands; + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + unsigned maCount = 0; // Largest number of multiple alternative constraints. + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + + for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { + ConstraintOperands.emplace_back(std::move(CI)); + AsmOperandInfo &OpInfo = ConstraintOperands.back(); + + // Update multiple alternative constraint count. + if (OpInfo.multipleAlternatives.size() > maCount) + maCount = OpInfo.multipleAlternatives.size(); + + OpInfo.ConstraintVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + // Indirect outputs just consume an argument. + if (OpInfo.isIndirect) { + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + } + + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(!CS.getType()->isVoidTy() && + "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(CS.getType())) { + OpInfo.ConstraintVT = + getSimpleValueType(DL, STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType()); + } + ++ResNo; + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + if (OpInfo.CallOperandVal) { + llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); + if (OpInfo.isIndirect) { + llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + if (!PtrTy) + report_fatal_error("Indirect operand for inline asm not a pointer!"); + OpTy = PtrTy->getElementType(); + } + + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. + if (StructType *STy = dyn_cast<StructType>(OpTy)) + if (STy->getNumElements() == 1) + OpTy = STy->getElementType(0); + + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = DL.getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpInfo.ConstraintVT = + MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true); + break; + } + } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { + unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace()); + OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); + } else { + OpInfo.ConstraintVT = MVT::getVT(OpTy, true); + } + } + } + + // If we have multiple alternative constraints, select the best alternative. + if (!ConstraintOperands.empty()) { + if (maCount) { + unsigned bestMAIndex = 0; + int bestWeight = -1; + // weight: -1 = invalid match, and 0 = so-so match to 5 = good match. + int weight = -1; + unsigned maIndex; + // Compute the sums of the weights for each alternative, keeping track + // of the best (highest weight) one so far. + for (maIndex = 0; maIndex < maCount; ++maIndex) { + int weightSum = 0; + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo &OpInfo = ConstraintOperands[cIndex]; + if (OpInfo.Type == InlineAsm::isClobber) + continue; + + // If this is an output operand with a matching input operand, + // look up the matching input. If their types mismatch, e.g. one + // is an integer, the other is floating point, or their sizes are + // different, flag it as an maCantMatch. + if (OpInfo.hasMatchingInput()) { + AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (OpInfo.ConstraintVT.getSizeInBits() != + Input.ConstraintVT.getSizeInBits())) { + weightSum = -1; // Can't match. + break; + } + } + } + weight = getMultipleConstraintMatchWeight(OpInfo, maIndex); + if (weight == -1) { + weightSum = -1; + break; + } + weightSum += weight; + } + // Update best. + if (weightSum > bestWeight) { + bestWeight = weightSum; + bestMAIndex = maIndex; + } + } + + // Now select chosen alternative in each constraint. + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo &cInfo = ConstraintOperands[cIndex]; + if (cInfo.Type == InlineAsm::isClobber) + continue; + cInfo.selectAlternative(bestMAIndex); + } + } + } + + // Check and hook up tied operands, choose constraint code to use. + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo &OpInfo = ConstraintOperands[cIndex]; + + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, + Input.ConstraintVT); + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (MatchRC.second != InputRC.second)) { + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); + } + } + } + } + + return ConstraintOperands; +} + +/// Return an integer indicating how general CT is. +static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { + switch (CT) { + case TargetLowering::C_Immediate: + case TargetLowering::C_Other: + case TargetLowering::C_Unknown: + return 0; + case TargetLowering::C_Register: + return 1; + case TargetLowering::C_RegisterClass: + return 2; + case TargetLowering::C_Memory: + return 3; + } + llvm_unreachable("Invalid constraint type"); +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + TargetLowering::getMultipleConstraintMatchWeight( + AsmOperandInfo &info, int maIndex) const { + InlineAsm::ConstraintCodeVector *rCodes; + if (maIndex >= (int)info.multipleAlternatives.size()) + rCodes = &info.Codes; + else + rCodes = &info.multipleAlternatives[maIndex].Codes; + ConstraintWeight BestWeight = CW_Invalid; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = rCodes->size(); i != e; ++i) { + ConstraintWeight weight = + getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str()); + if (weight > BestWeight) + BestWeight = weight; + } + + return BestWeight; +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + TargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + // Look at the constraint type. + switch (*constraint) { + case 'i': // immediate integer. + case 'n': // immediate integer with a known value. + if (isa<ConstantInt>(CallOperandVal)) + weight = CW_Constant; + break; + case 's': // non-explicit intregal immediate. + if (isa<GlobalValue>(CallOperandVal)) + weight = CW_Constant; + break; + case 'E': // immediate float if host format. + case 'F': // immediate float. + if (isa<ConstantFP>(CallOperandVal)) + weight = CW_Constant; + break; + case '<': // memory operand with autodecrement. + case '>': // memory operand with autoincrement. + case 'm': // memory operand. + case 'o': // offsettable memory operand + case 'V': // non-offsettable memory operand + weight = CW_Memory; + break; + case 'r': // general register. + case 'g': // general register, memory operand or immediate integer. + // note: Clang converts "g" to "imr". + if (CallOperandVal->getType()->isIntegerTy()) + weight = CW_Register; + break; + case 'X': // any operand. + default: + weight = CW_Default; + break; + } + return weight; +} + +/// If there are multiple different constraints that we could pick for this +/// operand (e.g. "imr") try to pick the 'best' one. +/// This is somewhat tricky: constraints fall into four classes: +/// Other -> immediates and magic values +/// Register -> one specific register +/// RegisterClass -> a group of regs +/// Memory -> memory +/// Ideally, we would pick the most specific constraint possible: if we have +/// something that fits into a register, we would pick it. The problem here +/// is that if we have something that could either be in a register or in +/// memory that use of the register could cause selection of *other* +/// operands to fail: they might only succeed if we pick memory. Because of +/// this the heuristic we use is: +/// +/// 1) If there is an 'other' constraint, and if the operand is valid for +/// that constraint, use it. This makes us take advantage of 'i' +/// constraints when available. +/// 2) Otherwise, pick the most general constraint present. This prefers +/// 'm' over 'r', for example. +/// +static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, + const TargetLowering &TLI, + SDValue Op, SelectionDAG *DAG) { + assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); + unsigned BestIdx = 0; + TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; + int BestGenerality = -1; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { + TargetLowering::ConstraintType CType = + TLI.getConstraintType(OpInfo.Codes[i]); + + // If this is an 'other' or 'immediate' constraint, see if the operand is + // valid for it. For example, on X86 we might have an 'rI' constraint. If + // the operand is an integer in the range [0..31] we want to use I (saving a + // load of a register), otherwise we must use 'r'. + if ((CType == TargetLowering::C_Other || + CType == TargetLowering::C_Immediate) && Op.getNode()) { + assert(OpInfo.Codes[i].size() == 1 && + "Unhandled multi-letter 'other' constraint"); + std::vector<SDValue> ResultOps; + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i], + ResultOps, *DAG); + if (!ResultOps.empty()) { + BestType = CType; + BestIdx = i; + break; + } + } + + // Things with matching constraints can only be registers, per gcc + // documentation. This mainly affects "g" constraints. + if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) + continue; + + // This constraint letter is more general than the previous one, use it. + int Generality = getConstraintGenerality(CType); + if (Generality > BestGenerality) { + BestType = CType; + BestIdx = i; + BestGenerality = Generality; + } + } + + OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; + OpInfo.ConstraintType = BestType; +} + +/// Determines the constraint code and constraint type to use for the specific +/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. +void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, + SDValue Op, + SelectionDAG *DAG) const { + assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); + + // Single-letter constraints ('r') are very common. + if (OpInfo.Codes.size() == 1) { + OpInfo.ConstraintCode = OpInfo.Codes[0]; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } else { + ChooseConstraint(OpInfo, *this, Op, DAG); + } + + // 'X' matches anything. + if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { + // Labels and constants are handled elsewhere ('X' is the only thing + // that matches labels). For Functions, the type here is the type of + // the result, which is not what we want to look at; leave them alone. + Value *v = OpInfo.CallOperandVal; + if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) { + OpInfo.CallOperandVal = v; + return; + } + + if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress) + return; + + // Otherwise, try to resolve it to something we know about by looking at + // the actual operand type. + if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { + OpInfo.ConstraintCode = Repl; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } + } +} + +/// Given an exact SDIV by a constant, create a multiplication +/// with the multiplicative inverse of the constant. +static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT SVT = VT.getScalarType(); + EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + + bool UseSRA = false; + SmallVector<SDValue, 16> Shifts, Factors; + + auto BuildSDIVPattern = [&](ConstantSDNode *C) { + if (C->isNullValue()) + return false; + APInt Divisor = C->getAPIntValue(); + unsigned Shift = Divisor.countTrailingZeros(); + if (Shift) { + Divisor.ashrInPlace(Shift); + UseSRA = true; + } + // Calculate the multiplicative inverse, using Newton's method. + APInt t; + APInt Factor = Divisor; + while ((t = Divisor * Factor) != 1) + Factor *= APInt(Divisor.getBitWidth(), 2) - t; + Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT)); + Factors.push_back(DAG.getConstant(Factor, dl, SVT)); + return true; + }; + + // Collect all magic values from the build vector. + if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern)) + return SDValue(); + + SDValue Shift, Factor; + if (VT.isVector()) { + Shift = DAG.getBuildVector(ShVT, dl, Shifts); + Factor = DAG.getBuildVector(VT, dl, Factors); + } else { + Shift = Shifts[0]; + Factor = Factors[0]; + } + + SDValue Res = Op0; + + // Shift the value upfront if it is even, so the LSB is one. + if (UseSRA) { + // TODO: For UDIV use SRL instead of SRA. + SDNodeFlags Flags; + Flags.setExact(true); + Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags); + Created.push_back(Res.getNode()); + } + + return DAG.getNode(ISD::MUL, dl, VT, Res, Factor); +} + +SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N, 0); // Lower SDIV as SDIV + return SDValue(); +} + +/// Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". +SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, + bool IsAfterLegalization, + SmallVectorImpl<SDNode *> &Created) const { + SDLoc dl(N); + EVT VT = N->getValueType(0); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + unsigned EltBits = VT.getScalarSizeInBits(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + // If the sdiv has an 'exact' bit we can use a simpler lowering. + if (N->getFlags().hasExact()) + return BuildExactSDIV(*this, N, dl, DAG, Created); + + SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks; + + auto BuildSDIVPattern = [&](ConstantSDNode *C) { + if (C->isNullValue()) + return false; + + const APInt &Divisor = C->getAPIntValue(); + APInt::ms magics = Divisor.magic(); + int NumeratorFactor = 0; + int ShiftMask = -1; + + if (Divisor.isOneValue() || Divisor.isAllOnesValue()) { + // If d is +1/-1, we just multiply the numerator by +1/-1. + NumeratorFactor = Divisor.getSExtValue(); + magics.m = 0; + magics.s = 0; + ShiftMask = 0; + } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { + // If d > 0 and m < 0, add the numerator. + NumeratorFactor = 1; + } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { + // If d < 0 and m > 0, subtract the numerator. + NumeratorFactor = -1; + } + + MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT)); + Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT)); + Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT)); + ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT)); + return true; + }; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Collect the shifts / magic values from each element. + if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern)) + return SDValue(); + + SDValue MagicFactor, Factor, Shift, ShiftMask; + if (VT.isVector()) { + MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); + Factor = DAG.getBuildVector(VT, dl, Factors); + Shift = DAG.getBuildVector(ShVT, dl, Shifts); + ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks); + } else { + MagicFactor = MagicFactors[0]; + Factor = Factors[0]; + Shift = Shifts[0]; + ShiftMask = ShiftMasks[0]; + } + + // Multiply the numerator (operand 0) by the magic value. + // FIXME: We should support doing a MUL in a wider type. + SDValue Q; + if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) + : isOperationLegalOrCustom(ISD::MULHS, VT)) + Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor); + else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) + : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) { + SDValue LoHi = + DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor); + Q = SDValue(LoHi.getNode(), 1); + } else + return SDValue(); // No mulhs or equivalent. + Created.push_back(Q.getNode()); + + // (Optionally) Add/subtract the numerator using Factor. + Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor); + Created.push_back(Factor.getNode()); + Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor); + Created.push_back(Q.getNode()); + + // Shift right algebraic by shift value. + Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift); + Created.push_back(Q.getNode()); + + // Extract the sign bit, mask it and add it to the quotient. + SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT); + SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift); + Created.push_back(T.getNode()); + T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask); + Created.push_back(T.getNode()); + return DAG.getNode(ISD::ADD, dl, VT, Q, T); +} + +/// Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". +SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, + bool IsAfterLegalization, + SmallVectorImpl<SDNode *> &Created) const { + SDLoc dl(N); + EVT VT = N->getValueType(0); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + unsigned EltBits = VT.getScalarSizeInBits(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + bool UseNPQ = false; + SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; + + auto BuildUDIVPattern = [&](ConstantSDNode *C) { + if (C->isNullValue()) + return false; + // FIXME: We should use a narrower constant when the upper + // bits are known to be zero. + APInt Divisor = C->getAPIntValue(); + APInt::mu magics = Divisor.magicu(); + unsigned PreShift = 0, PostShift = 0; + + // If the divisor is even, we can avoid using the expensive fixup by + // shifting the divided value upfront. + if (magics.a != 0 && !Divisor[0]) { + PreShift = Divisor.countTrailingZeros(); + // Get magic number for the shifted divisor. + magics = Divisor.lshr(PreShift).magicu(PreShift); + assert(magics.a == 0 && "Should use cheap fixup now"); + } + + APInt Magic = magics.m; + + unsigned SelNPQ; + if (magics.a == 0 || Divisor.isOneValue()) { + assert(magics.s < Divisor.getBitWidth() && + "We shouldn't generate an undefined shift!"); + PostShift = magics.s; + SelNPQ = false; + } else { + PostShift = magics.s - 1; + SelNPQ = true; + } + + PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT)); + MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT)); + NPQFactors.push_back( + DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) + : APInt::getNullValue(EltBits), + dl, SVT)); + PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT)); + UseNPQ |= SelNPQ; + return true; + }; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Collect the shifts/magic values from each element. + if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern)) + return SDValue(); + + SDValue PreShift, PostShift, MagicFactor, NPQFactor; + if (VT.isVector()) { + PreShift = DAG.getBuildVector(ShVT, dl, PreShifts); + MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); + NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors); + PostShift = DAG.getBuildVector(ShVT, dl, PostShifts); + } else { + PreShift = PreShifts[0]; + MagicFactor = MagicFactors[0]; + PostShift = PostShifts[0]; + } + + SDValue Q = N0; + Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift); + Created.push_back(Q.getNode()); + + // FIXME: We should support doing a MUL in a wider type. + auto GetMULHU = [&](SDValue X, SDValue Y) { + if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) + : isOperationLegalOrCustom(ISD::MULHU, VT)) + return DAG.getNode(ISD::MULHU, dl, VT, X, Y); + if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) + : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) { + SDValue LoHi = + DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y); + return SDValue(LoHi.getNode(), 1); + } + return SDValue(); // No mulhu or equivalent + }; + + // Multiply the numerator (operand 0) by the magic value. + Q = GetMULHU(Q, MagicFactor); + if (!Q) + return SDValue(); + + Created.push_back(Q.getNode()); + + if (UseNPQ) { + SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q); + Created.push_back(NPQ.getNode()); + + // For vectors we might have a mix of non-NPQ/NPQ paths, so use + // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero. + if (VT.isVector()) + NPQ = GetMULHU(NPQ, NPQFactor); + else + NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT)); + + Created.push_back(NPQ.getNode()); + + Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); + Created.push_back(Q.getNode()); + } + + Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift); + Created.push_back(Q.getNode()); + + SDValue One = DAG.getConstant(1, dl, VT); + SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ); + return DAG.getSelect(dl, VT, IsOne, N0, Q); +} + +/// If all values in Values that *don't* match the predicate are same 'splat' +/// value, then replace all values with that splat value. +/// Else, if AlternativeReplacement was provided, then replace all values that +/// do match predicate with AlternativeReplacement value. +static void +turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values, + std::function<bool(SDValue)> Predicate, + SDValue AlternativeReplacement = SDValue()) { + SDValue Replacement; + // Is there a value for which the Predicate does *NOT* match? What is it? + auto SplatValue = llvm::find_if_not(Values, Predicate); + if (SplatValue != Values.end()) { + // Does Values consist only of SplatValue's and values matching Predicate? + if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) { + return Value == *SplatValue || Predicate(Value); + })) // Then we shall replace values matching predicate with SplatValue. + Replacement = *SplatValue; + } + if (!Replacement) { + // Oops, we did not find the "baseline" splat value. + if (!AlternativeReplacement) + return; // Nothing to do. + // Let's replace with provided value then. + Replacement = AlternativeReplacement; + } + std::replace_if(Values.begin(), Values.end(), Predicate, Replacement); +} + +/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE +/// where the divisor is constant and the comparison target is zero, +/// return a DAG expression that will generate the same comparison result +/// using only multiplications, additions and shifts/rotations. +/// Ref: "Hacker's Delight" 10-17. +SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + SmallVector<SDNode *, 2> Built; + if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, + DCI, DL, Built)) { + for (SDNode *N : Built) + DCI.AddToWorklist(N); + return Folded; + } + + return SDValue(); +} + +SDValue +TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL, + SmallVectorImpl<SDNode *> &Created) const { + // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) + // - D must be constant, with D = D0 * 2^K where D0 is odd + // - P is the multiplicative inverse of D0 modulo 2^W + // - Q = floor(((2^W) - 1) / D) + // where W is the width of the common type of N and D. + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Only applicable for (in)equality comparisons."); + + SelectionDAG &DAG = DCI.DAG; + + EVT VT = REMNode.getValueType(); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + + // If MUL is unavailable, we cannot proceed in any case. + if (!isOperationLegalOrCustom(ISD::MUL, VT)) + return SDValue(); + + // TODO: Could support comparing with non-zero too. + ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); + if (!CompTarget || !CompTarget->isNullValue()) + return SDValue(); + + bool HadOneDivisor = false; + bool AllDivisorsAreOnes = true; + bool HadEvenDivisor = false; + bool AllDivisorsArePowerOfTwo = true; + SmallVector<SDValue, 16> PAmts, KAmts, QAmts; + + auto BuildUREMPattern = [&](ConstantSDNode *C) { + // Division by 0 is UB. Leave it to be constant-folded elsewhere. + if (C->isNullValue()) + return false; + + const APInt &D = C->getAPIntValue(); + // If all divisors are ones, we will prefer to avoid the fold. + HadOneDivisor |= D.isOneValue(); + AllDivisorsAreOnes &= D.isOneValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + APInt D0 = D.lshr(K); + + // D is even if it has trailing zeros. + HadEvenDivisor |= (K != 0); + // D is a power-of-two if D0 is one. + // If all divisors are power-of-two, we will prefer to avoid the fold. + AllDivisorsArePowerOfTwo &= D0.isOneValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // Q = floor((2^W - 1) / D) + APInt Q = APInt::getAllOnesValue(W).udiv(D); + + assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + "We are expecting that K is always less than all-ones for ShSVT"); + + // If the divisor is 1 the result can be constant-folded. + if (D.isOneValue()) { + // Set P and K amount to a bogus values so we can try to splat them. + P = 0; + K = -1; + assert(Q.isAllOnesValue() && + "Expecting all-ones comparison for one divisor"); + } + + PAmts.push_back(DAG.getConstant(P, DL, SVT)); + KAmts.push_back( + DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); + QAmts.push_back(DAG.getConstant(Q, DL, SVT)); + return true; + }; + + SDValue N = REMNode.getOperand(0); + SDValue D = REMNode.getOperand(1); + + // Collect the values from each element. + if (!ISD::matchUnaryPredicate(D, BuildUREMPattern)) + return SDValue(); + + // If this is a urem by a one, avoid the fold since it can be constant-folded. + if (AllDivisorsAreOnes) + return SDValue(); + + // If this is a urem by a powers-of-two, avoid the fold since it can be + // best implemented as a bit test. + if (AllDivisorsArePowerOfTwo) + return SDValue(); + + SDValue PVal, KVal, QVal; + if (VT.isVector()) { + if (HadOneDivisor) { + // Try to turn PAmts into a splat, since we don't care about the values + // that are currently '0'. If we can't, just keep '0'`s. + turnVectorIntoSplatVector(PAmts, isNullConstant); + // Try to turn KAmts into a splat, since we don't care about the values + // that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(KAmts, isAllOnesConstant, + DAG.getConstant(0, DL, ShSVT)); + } + + PVal = DAG.getBuildVector(VT, DL, PAmts); + KVal = DAG.getBuildVector(ShVT, DL, KAmts); + QVal = DAG.getBuildVector(VT, DL, QAmts); + } else { + PVal = PAmts[0]; + KVal = KAmts[0]; + QVal = QAmts[0]; + } + + // (mul N, P) + SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); + Created.push_back(Op0.getNode()); + + // Rotate right only if any divisor was even. We avoid rotates for all-odd + // divisors as a performance improvement, since rotating by 0 is a no-op. + if (HadEvenDivisor) { + // We need ROTR to do this. + if (!isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + SDNodeFlags Flags; + Flags.setExact(true); + // UREM: (rotr (mul N, P), K) + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Created.push_back(Op0.getNode()); + } + + // UREM: (setule/setugt (rotr (mul N, P), K), Q) + return DAG.getSetCC(DL, SETCCVT, Op0, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); +} + +/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE +/// where the divisor is constant and the comparison target is zero, +/// return a DAG expression that will generate the same comparison result +/// using only multiplications, additions and shifts/rotations. +/// Ref: "Hacker's Delight" 10-17. +SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + SmallVector<SDNode *, 7> Built; + if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, + DCI, DL, Built)) { + assert(Built.size() <= 7 && "Max size prediction failed."); + for (SDNode *N : Built) + DCI.AddToWorklist(N); + return Folded; + } + + return SDValue(); +} + +SDValue +TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL, + SmallVectorImpl<SDNode *> &Created) const { + // Fold: + // (seteq/ne (srem N, D), 0) + // To: + // (setule/ugt (rotr (add (mul N, P), A), K), Q) + // + // - D must be constant, with D = D0 * 2^K where D0 is odd + // - P is the multiplicative inverse of D0 modulo 2^W + // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k))) + // - Q = floor((2 * A) / (2^K)) + // where W is the width of the common type of N and D. + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Only applicable for (in)equality comparisons."); + + SelectionDAG &DAG = DCI.DAG; + + EVT VT = REMNode.getValueType(); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + + // If MUL is unavailable, we cannot proceed in any case. + if (!isOperationLegalOrCustom(ISD::MUL, VT)) + return SDValue(); + + // TODO: Could support comparing with non-zero too. + ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); + if (!CompTarget || !CompTarget->isNullValue()) + return SDValue(); + + bool HadIntMinDivisor = false; + bool HadOneDivisor = false; + bool AllDivisorsAreOnes = true; + bool HadEvenDivisor = false; + bool NeedToApplyOffset = false; + bool AllDivisorsArePowerOfTwo = true; + SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts; + + auto BuildSREMPattern = [&](ConstantSDNode *C) { + // Division by 0 is UB. Leave it to be constant-folded elsewhere. + if (C->isNullValue()) + return false; + + // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine. + + // WARNING: this fold is only valid for positive divisors! + APInt D = C->getAPIntValue(); + if (D.isNegative()) + D.negate(); // `rem %X, -C` is equivalent to `rem %X, C` + + HadIntMinDivisor |= D.isMinSignedValue(); + + // If all divisors are ones, we will prefer to avoid the fold. + HadOneDivisor |= D.isOneValue(); + AllDivisorsAreOnes &= D.isOneValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + APInt D0 = D.lshr(K); + + if (!D.isMinSignedValue()) { + // D is even if it has trailing zeros; unless it's INT_MIN, in which case + // we don't care about this lane in this fold, we'll special-handle it. + HadEvenDivisor |= (K != 0); + } + + // D is a power-of-two if D0 is one. This includes INT_MIN. + // If all divisors are power-of-two, we will prefer to avoid the fold. + AllDivisorsArePowerOfTwo &= D0.isOneValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // A = floor((2^(W - 1) - 1) / D0) & -2^K + APInt A = APInt::getSignedMaxValue(W).udiv(D0); + A.clearLowBits(K); + + if (!D.isMinSignedValue()) { + // If divisor INT_MIN, then we don't care about this lane in this fold, + // we'll special-handle it. + NeedToApplyOffset |= A != 0; + } + + // Q = floor((2 * A) / (2^K)) + APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K)); + + assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && + "We are expecting that A is always less than all-ones for SVT"); + assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + "We are expecting that K is always less than all-ones for ShSVT"); + + // If the divisor is 1 the result can be constant-folded. Likewise, we + // don't care about INT_MIN lanes, those can be set to undef if appropriate. + if (D.isOneValue()) { + // Set P, A and K to a bogus values so we can try to splat them. + P = 0; + A = -1; + K = -1; + + // x ?% 1 == 0 <--> true <--> x u<= -1 + Q = -1; + } + + PAmts.push_back(DAG.getConstant(P, DL, SVT)); + AAmts.push_back(DAG.getConstant(A, DL, SVT)); + KAmts.push_back( + DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); + QAmts.push_back(DAG.getConstant(Q, DL, SVT)); + return true; + }; + + SDValue N = REMNode.getOperand(0); + SDValue D = REMNode.getOperand(1); + + // Collect the values from each element. + if (!ISD::matchUnaryPredicate(D, BuildSREMPattern)) + return SDValue(); + + // If this is a srem by a one, avoid the fold since it can be constant-folded. + if (AllDivisorsAreOnes) + return SDValue(); + + // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold + // since it can be best implemented as a bit test. + if (AllDivisorsArePowerOfTwo) + return SDValue(); + + SDValue PVal, AVal, KVal, QVal; + if (VT.isVector()) { + if (HadOneDivisor) { + // Try to turn PAmts into a splat, since we don't care about the values + // that are currently '0'. If we can't, just keep '0'`s. + turnVectorIntoSplatVector(PAmts, isNullConstant); + // Try to turn AAmts into a splat, since we don't care about the + // values that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(AAmts, isAllOnesConstant, + DAG.getConstant(0, DL, SVT)); + // Try to turn KAmts into a splat, since we don't care about the values + // that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(KAmts, isAllOnesConstant, + DAG.getConstant(0, DL, ShSVT)); + } + + PVal = DAG.getBuildVector(VT, DL, PAmts); + AVal = DAG.getBuildVector(VT, DL, AAmts); + KVal = DAG.getBuildVector(ShVT, DL, KAmts); + QVal = DAG.getBuildVector(VT, DL, QAmts); + } else { + PVal = PAmts[0]; + AVal = AAmts[0]; + KVal = KAmts[0]; + QVal = QAmts[0]; + } + + // (mul N, P) + SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); + Created.push_back(Op0.getNode()); + + if (NeedToApplyOffset) { + // We need ADD to do this. + if (!isOperationLegalOrCustom(ISD::ADD, VT)) + return SDValue(); + + // (add (mul N, P), A) + Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal); + Created.push_back(Op0.getNode()); + } + + // Rotate right only if any divisor was even. We avoid rotates for all-odd + // divisors as a performance improvement, since rotating by 0 is a no-op. + if (HadEvenDivisor) { + // We need ROTR to do this. + if (!isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + SDNodeFlags Flags; + Flags.setExact(true); + // SREM: (rotr (add (mul N, P), A), K) + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Created.push_back(Op0.getNode()); + } + + // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q) + SDValue Fold = + DAG.getSetCC(DL, SETCCVT, Op0, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + + // If we didn't have lanes with INT_MIN divisor, then we're done. + if (!HadIntMinDivisor) + return Fold; + + // That fold is only valid for positive divisors. Which effectively means, + // it is invalid for INT_MIN divisors. So if we have such a lane, + // we must fix-up results for said lanes. + assert(VT.isVector() && "Can/should only get here for vectors."); + + if (!isOperationLegalOrCustom(ISD::SETEQ, VT) || + !isOperationLegalOrCustom(ISD::AND, VT) || + !isOperationLegalOrCustom(Cond, VT) || + !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return SDValue(); + + Created.push_back(Fold.getNode()); + + SDValue IntMin = DAG.getConstant( + APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT); + SDValue IntMax = DAG.getConstant( + APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT); + SDValue Zero = + DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT); + + // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded. + SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ); + Created.push_back(DivisorIsIntMin.getNode()); + + // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0 + SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax); + Created.push_back(Masked.getNode()); + SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond); + Created.push_back(MaskedIsZero.getNode()); + + // To produce final result we need to blend 2 vectors: 'SetCC' and + // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick + // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is + // constant-folded, select can get lowered to a shuffle with constant mask. + SDValue Blended = + DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold); + + return Blended; +} + +bool TargetLowering:: +verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { + if (!isa<ConstantSDNode>(Op.getOperand(0))) { + DAG.getContext()->emitError("argument to '__builtin_return_address' must " + "be a constant integer"); + return true; + } + + return false; +} + +char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth) const { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) + return 2; + + // Don't allow anything with multiple uses unless we know it is free. + EVT VT = Op.getValueType(); + const SDNodeFlags Flags = Op->getFlags(); + const TargetOptions &Options = DAG.getTarget().Options; + if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND && + isFPExtFree(VT, Op.getOperand(0).getValueType()))) + return 0; + + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return 0; + + switch (Op.getOpcode()) { + case ISD::ConstantFP: { + if (!LegalOperations) + return 1; + + // Don't invert constant FP values after legalization unless the target says + // the negated constant is legal. + return isOperationLegal(ISD::ConstantFP, VT) || + isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT, + ForCodeSize); + } + case ISD::BUILD_VECTOR: { + // Only permit BUILD_VECTOR of constants. + if (llvm::any_of(Op->op_values(), [&](SDValue N) { + return !N.isUndef() && !isa<ConstantFPSDNode>(N); + })) + return 0; + if (!LegalOperations) + return 1; + if (isOperationLegal(ISD::ConstantFP, VT) && + isOperationLegal(ISD::BUILD_VECTOR, VT)) + return 1; + return llvm::all_of(Op->op_values(), [&](SDValue N) { + return N.isUndef() || + isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT, + ForCodeSize); + }); + } + case ISD::FADD: + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT)) + return 0; + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1)) + return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return 1; + + case ISD::FMUL: + case ISD::FDIV: + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1)) + return V; + + // Ignore X * 2.0 because that is expected to be canonicalized to X + X. + if (auto *C = isConstOrConstSplatFP(Op.getOperand(1))) + if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL) + return 0; + + return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + case ISD::FMA: + case ISD::FMAD: { + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (!V2) + return 0; + + // One of Op0/Op1 must be cheaply negatible, then select the cheapest. + char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V01 = std::max(V0, V1); + return V01 ? std::max(V01, V2) : 0; + } + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + } + + return 0; +} + +SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, + bool ForCodeSize, + unsigned Depth) const { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) + return Op.getOperand(0); + + assert(Depth <= SelectionDAG::MaxRecursionDepth && + "getNegatedExpression doesn't match isNegatibleForFree"); + const SDNodeFlags Flags = Op->getFlags(); + + switch (Op.getOpcode()) { + case ISD::ConstantFP: { + APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); + } + case ISD::BUILD_VECTOR: { + SmallVector<SDValue, 4> Ops; + for (SDValue C : Op->op_values()) { + if (C.isUndef()) { + Ops.push_back(C); + continue; + } + APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF(); + V.changeSign(); + Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); + } + return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); + } + case ISD::FADD: + assert((DAG.getTarget().Options.NoSignedZerosFPMath || + Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, + Depth + 1)) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1), Flags); + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(0), Flags); + case ISD::FSUB: + // fold (fneg (fsub 0, B)) -> B + if (ConstantFPSDNode *N0CFP = + isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) + if (N0CFP->isZero()) + return Op.getOperand(1); + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(0), Flags); + + case ISD::FMUL: + case ISD::FDIV: + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, + Depth + 1)) + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1), Flags); + + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + return DAG.getNode( + Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), + getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1), + Flags); + + case ISD::FMA: + case ISD::FMAD: { + assert((DAG.getTarget().Options.NoSignedZerosFPMath || + Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); + + SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (V0 >= V1) { + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + SDValue Neg0 = getNegatedExpression( + Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0, + Op.getOperand(1), Neg2, Flags); + } + + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + Op.getOperand(0), Neg1, Neg2, Flags); + } + + case ISD::FP_EXTEND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1)); + case ISD::FP_ROUND: + return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1)); + } + + llvm_unreachable("Unknown code"); +} + +//===----------------------------------------------------------------------===// +// Legalization Utilities +//===----------------------------------------------------------------------===// + +bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, + SDValue LHS, SDValue RHS, + SmallVectorImpl<SDValue> &Result, + EVT HiLoVT, SelectionDAG &DAG, + MulExpansionKind Kind, SDValue LL, + SDValue LH, SDValue RL, SDValue RH) const { + assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI || + Opcode == ISD::SMUL_LOHI); + + bool HasMULHS = (Kind == MulExpansionKind::Always) || + isOperationLegalOrCustom(ISD::MULHS, HiLoVT); + bool HasMULHU = (Kind == MulExpansionKind::Always) || + isOperationLegalOrCustom(ISD::MULHU, HiLoVT); + bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) || + isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT); + bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) || + isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT); + + if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI) + return false; + + unsigned OuterBitSize = VT.getScalarSizeInBits(); + unsigned InnerBitSize = HiLoVT.getScalarSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(LHS); + unsigned RHSSB = DAG.ComputeNumSignBits(RHS); + + // LL, LH, RL, and RH must be either all NULL or all set to a value. + assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) || + (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode())); + + SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT); + auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi, + bool Signed) -> bool { + if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) { + Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R); + Hi = SDValue(Lo.getNode(), 1); + return true; + } + if ((Signed && HasMULHS) || (!Signed && HasMULHU)) { + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R); + Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R); + return true; + } + return false; + }; + + SDValue Lo, Hi; + + if (!LL.getNode() && !RL.getNode() && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS); + RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS); + } + + if (!LL.getNode()) + return false; + + APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); + if (DAG.MaskedValueIsZero(LHS, HighMask) && + DAG.MaskedValueIsZero(RHS, HighMask)) { + // The inputs are both zero-extended. + if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) { + Result.push_back(Lo); + Result.push_back(Hi); + if (Opcode != ISD::MUL) { + SDValue Zero = DAG.getConstant(0, dl, HiLoVT); + Result.push_back(Zero); + Result.push_back(Zero); + } + return true; + } + } + + if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize && + RHSSB > InnerBitSize) { + // The input values are both sign-extended. + // TODO non-MUL case? + if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) { + Result.push_back(Lo); + Result.push_back(Hi); + return true; + } + } + + unsigned ShiftAmount = OuterBitSize - InnerBitSize; + EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout()); + if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) { + // FIXME getShiftAmountTy does not always return a sensible result when VT + // is an illegal type, and so the type may be too small to fit the shift + // amount. Override it with i32. The shift will have to be legalized. + ShiftAmountTy = MVT::i32; + } + SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy); + + if (!LH.getNode() && !RH.getNode() && + isOperationLegalOrCustom(ISD::SRL, VT) && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift); + LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); + RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift); + RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH); + } + + if (!LH.getNode()) + return false; + + if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false)) + return false; + + Result.push_back(Lo); + + if (Opcode == ISD::MUL) { + RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + Result.push_back(Hi); + return true; + } + + // Compute the full width result. + auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue { + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); + Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi); + Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); + return DAG.getNode(ISD::OR, dl, VT, Lo, Hi); + }; + + SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi); + if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false)) + return false; + + // This is effectively the add part of a multiply-add of half-sized operands, + // so it cannot overflow. + Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi)); + + if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false)) + return false; + + SDValue Zero = DAG.getConstant(0, dl, HiLoVT); + EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + + bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) && + isOperationLegalOrCustom(ISD::ADDE, VT)); + if (UseGlue) + Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next, + Merge(Lo, Hi)); + else + Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next, + Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType)); + + SDValue Carry = Next.getValue(1); + Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next)); + Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift); + + if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI)) + return false; + + if (UseGlue) + Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero, + Carry); + else + Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi, + Zero, Carry); + + Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi)); + + if (Opcode == ISD::SMUL_LOHI) { + SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL)); + Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT); + + NextSub = DAG.getNode(ISD::SUB, dl, VT, Next, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL)); + Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT); + } + + Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next)); + Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift); + Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next)); + return true; +} + +bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, + SelectionDAG &DAG, MulExpansionKind Kind, + SDValue LL, SDValue LH, SDValue RL, + SDValue RH) const { + SmallVector<SDValue, 2> Result; + bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N, + N->getOperand(0), N->getOperand(1), Result, HiLoVT, + DAG, Kind, LL, LH, RL, RH); + if (Ok) { + assert(Result.size() == 2); + Lo = Result[0]; + Hi = Result[1]; + } + return Ok; +} + +bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getValueType(0); + + if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + !isOperationLegalOrCustom(ISD::SUB, VT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) + return false; + + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + SDValue X = Node->getOperand(0); + SDValue Y = Node->getOperand(1); + SDValue Z = Node->getOperand(2); + + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + bool IsFSHL = Node->getOpcode() == ISD::FSHL; + SDLoc DL(SDValue(Node, 0)); + + EVT ShVT = Z.getValueType(); + SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT); + SDValue Zero = DAG.getConstant(0, DL, ShVT); + + SDValue ShAmt; + if (isPowerOf2_32(EltSizeInBits)) { + SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT); + ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask); + } else { + ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC); + } + + SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt); + SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt); + SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt); + SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY); + + // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth, + // and that is undefined. We must compare and select to avoid UB. + EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT); + + // For fshl, 0-shift returns the 1st arg (X). + // For fshr, 0-shift returns the 2nd arg (Y). + SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ); + Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or); + return true; +} + +// TODO: Merge with expandFunnelShift. +bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getValueType(0); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + bool IsLeft = Node->getOpcode() == ISD::ROTL; + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDLoc DL(SDValue(Node, 0)); + + EVT ShVT = Op1.getValueType(); + SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT); + + // If a rotate in the other direction is legal, use it. + unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; + if (isOperationLegal(RevRot, VT)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1); + Result = DAG.getNode(RevRot, DL, VT, Op0, Sub); + return true; + } + + if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + !isOperationLegalOrCustom(ISD::SUB, VT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, VT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) + return false; + + // Otherwise, + // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1))) + // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1))) + // + assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 && + "Expecting the type bitwidth to be a power of 2"); + unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; + unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; + SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT); + SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1); + SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC); + SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC); + Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0), + DAG.getNode(HsOpc, DL, VT, Op0, And1)); + return true; +} + +bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); + SDLoc dl(SDValue(Node, 0)); + + // FIXME: Only f32 to i64 conversions are supported. + if (SrcVT != MVT::f32 || DstVT != MVT::i64) + return false; + + if (Node->isStrictFPOpcode()) + // When a NaN is converted to an integer a trap is allowed. We can't + // use this expansion here because it would eliminate that trap. Other + // traps are also allowed and cannot be eliminated. See + // IEEE 754-2008 sec 5.8. + return false; + + // Expand f32 -> i64 conversion + // This algorithm comes from compiler-rt's implementation of fixsfdi: + // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c + unsigned SrcEltBits = SrcVT.getScalarSizeInBits(); + EVT IntVT = SrcVT.changeTypeToInteger(); + EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout()); + + SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT); + SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT); + SDValue Bias = DAG.getConstant(127, dl, IntVT); + SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT); + SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT); + SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT); + + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src); + + SDValue ExponentBits = DAG.getNode( + ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), + DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT)); + SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); + + SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), + DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT)); + Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT); + + SDValue R = DAG.getNode(ISD::OR, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), + DAG.getConstant(0x00800000, dl, IntVT)); + + R = DAG.getZExtOrTrunc(R, dl, DstVT); + + R = DAG.getSelectCC( + dl, Exponent, ExponentLoBit, + DAG.getNode(ISD::SHL, dl, DstVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), + dl, IntShVT)), + DAG.getNode(ISD::SRL, dl, DstVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), + dl, IntShVT)), + ISD::SETGT); + + SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT, + DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign); + + Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT), + DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT); + return true; +} + +bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, + SDValue &Chain, + SelectionDAG &DAG) const { + SDLoc dl(SDValue(Node, 0)); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); + + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); + + // Only expand vector types if we have the appropriate vector bit operations. + unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : + ISD::FP_TO_SINT; + if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) || + !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT))) + return false; + + // If the maximum float value is smaller then the signed integer range, + // the destination signmask can't be represented by the float, so we can + // just use FP_TO_SINT directly. + const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT); + APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits())); + APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits()); + if (APFloat::opOverflow & + APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { + if (Node->isStrictFPOpcode()) { + Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { Node->getOperand(0), Src }); + Chain = Result.getValue(1); + } else + Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); + return true; + } + + SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); + SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); + + bool Strict = Node->isStrictFPOpcode() || + shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + + if (Strict) { + // Expand based on maximum range of FP_TO_SINT, if the value exceeds the + // signmask then offset (the result of which should be fully representable). + // Sel = Src < 0x8000000000000000 + // Val = select Sel, Src, Src - 0x8000000000000000 + // Ofs = select Sel, 0, 0x8000000000000000 + // Result = fp_to_sint(Val) ^ Ofs + + // TODO: Should any fast-math-flags be set for the FSUB? + SDValue SrcBiased; + if (Node->isStrictFPOpcode()) + SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Node->getOperand(0), Src, Cst }); + else + SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); + SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); + SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), + DAG.getConstant(SignMask, dl, DstVT)); + SDValue SInt; + if (Node->isStrictFPOpcode()) { + SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { SrcBiased.getValue(1), Val }); + Chain = SInt.getValue(1); + } else + SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); + } else { + // Expand based on maximum range of FP_TO_SINT: + // True = fp_to_sint(Src) + // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000) + // Result = select (Src < 0x8000000000000000), True, False + + SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); + // TODO: Should any fast-math-flags be set for the FSUB? + SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, + DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); + False = DAG.getNode(ISD::XOR, dl, DstVT, False, + DAG.getConstant(SignMask, dl, DstVT)); + Result = DAG.getSelect(dl, DstVT, Sel, True, False); + } + return true; +} + +bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDValue Src = Node->getOperand(0); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); + + if (SrcVT.getScalarType() != MVT::i64) + return false; + + SDLoc dl(SDValue(Node, 0)); + EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout()); + + if (DstVT.getScalarType() == MVT::f32) { + // Only expand vector types if we have the appropriate vector bit + // operations. + if (SrcVT.isVector() && + (!isOperationLegalOrCustom(ISD::SRL, SrcVT) || + !isOperationLegalOrCustom(ISD::FADD, DstVT) || + !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT))) + return false; + + // For unsigned conversions, convert them to signed conversions using the + // algorithm from the x86_64 __floatundidf in compiler_rt. + SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); + + SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); + SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst); + SDValue AndConst = DAG.getConstant(1, dl, SrcVT); + SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); + + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); + SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); + + // TODO: This really should be implemented using a branch rather than a + // select. We happen to get lucky and machinesink does the right + // thing most of the time. This would be a good candidate for a + // pseudo-op, or, even better, for whole-function isel. + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); + + SDValue SignBitTest = DAG.getSetCC( + dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); + Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast); + return true; + } + + if (DstVT.getScalarType() == MVT::f64) { + // Only expand vector types if we have the appropriate vector bit + // operations. + if (SrcVT.isVector() && + (!isOperationLegalOrCustom(ISD::SRL, SrcVT) || + !isOperationLegalOrCustom(ISD::FADD, DstVT) || + !isOperationLegalOrCustom(ISD::FSUB, DstVT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT))) + return false; + + // Implementation of unsigned i64 to f64 following the algorithm in + // __floatundidf in compiler_rt. This implementation has the advantage + // of performing rounding correctly, both in the default rounding mode + // and in all alternate rounding modes. + SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT); + SDValue TwoP84PlusTwoP52 = DAG.getConstantFP( + BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT); + SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT); + SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT); + SDValue HiShift = DAG.getConstant(32, dl, ShiftVT); + + SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask); + SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift); + SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52); + SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); + SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); + SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); + Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); + return true; + } + + return false; +} + +SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, + SelectionDAG &DAG) const { + SDLoc dl(Node); + unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ? + ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE; + EVT VT = Node->getValueType(0); + if (isOperationLegalOrCustom(NewOp, VT)) { + SDValue Quiet0 = Node->getOperand(0); + SDValue Quiet1 = Node->getOperand(1); + + if (!Node->getFlags().hasNoNaNs()) { + // Insert canonicalizes if it's possible we need to quiet to get correct + // sNaN behavior. + if (!DAG.isKnownNeverSNaN(Quiet0)) { + Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0, + Node->getFlags()); + } + if (!DAG.isKnownNeverSNaN(Quiet1)) { + Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1, + Node->getFlags()); + } + } + + return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags()); + } + + // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that + // instead if there are no NaNs. + if (Node->getFlags().hasNoNaNs()) { + unsigned IEEE2018Op = + Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM; + if (isOperationLegalOrCustom(IEEE2018Op, VT)) { + return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0), + Node->getOperand(1), Node->getFlags()); + } + } + + return SDValue(); +} + +bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = Node->getOperand(0); + unsigned Len = VT.getScalarSizeInBits(); + assert(VT.isInteger() && "CTPOP not implemented for this type."); + + // TODO: Add support for irregular type lengths. + if (!(Len <= 128 && Len % 8 == 0)) + return false; + + // Only expand vector types if we have the appropriate vector bit operations. + if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) || + !isOperationLegalOrCustom(ISD::SUB, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) || + !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) + return false; + + // This is the "best" algorithm from + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + SDValue Mask55 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT); + SDValue Mask33 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT); + SDValue Mask0F = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT); + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + + // v = v - ((v >> 1) & 0x55555555...) + Op = DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(1, dl, ShVT)), + Mask55)); + // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) + Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33), + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(2, dl, ShVT)), + Mask33)); + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Op = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ADD, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(4, dl, ShVT))), + Mask0F); + // v = (v * 0x01010101...) >> (Len - 8) + if (Len > 8) + Op = + DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), + DAG.getConstant(Len - 8, dl, ShVT)); + + Result = Op; + return true; +} + +bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = Node->getOperand(0); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + // If the non-ZERO_UNDEF version is supported we can use that instead. + if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF && + isOperationLegalOrCustom(ISD::CTLZ, VT)) { + Result = DAG.getNode(ISD::CTLZ, dl, VT, Op); + return true; + } + + // If the ZERO_UNDEF version is supported use that and handle the zero case. + if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); + Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ); + return true; + } + + // Only expand vector types if we have the appropriate vector bit operations. + if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || + !isOperationLegalOrCustom(ISD::CTPOP, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) + return false; + + // for now, we do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + // + // Ref: "Hacker's Delight" by Henry Warren + for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) { + SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT); + Op = DAG.getNode(ISD::OR, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp)); + } + Op = DAG.getNOT(dl, Op, VT); + Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); + return true; +} + +bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + SDValue Op = Node->getOperand(0); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + // If the non-ZERO_UNDEF version is supported we can use that instead. + if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF && + isOperationLegalOrCustom(ISD::CTTZ, VT)) { + Result = DAG.getNode(ISD::CTTZ, dl, VT, Op); + return true; + } + + // If the ZERO_UNDEF version is supported use that and handle the zero case. + if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) { + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); + Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ); + return true; + } + + // Only expand vector types if we have the appropriate vector bit operations. + if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || + (!isOperationLegalOrCustom(ISD::CTPOP, VT) && + !isOperationLegalOrCustom(ISD::CTLZ, VT)) || + !isOperationLegalOrCustom(ISD::SUB, VT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, VT) || + !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) + return false; + + // for now, we use: { return popcount(~x & (x - 1)); } + // unless the target has ctlz but not ctpop, in which case we use: + // { return 32 - nlz(~x & (x-1)); } + // Ref: "Hacker's Delight" by Henry Warren + SDValue Tmp = DAG.getNode( + ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), + DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT))); + + // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. + if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) { + Result = + DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT), + DAG.getNode(ISD::CTLZ, dl, VT, Tmp)); + return true; + } + + Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp); + return true; +} + +bool TargetLowering::expandABS(SDNode *N, SDValue &Result, + SelectionDAG &DAG) const { + SDLoc dl(N); + EVT VT = N->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = N->getOperand(0); + + // Only expand vector types if we have the appropriate vector operations. + if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) || + !isOperationLegalOrCustom(ISD::ADD, VT) || + !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) + return false; + + SDValue Shift = + DAG.getNode(ISD::SRA, dl, VT, Op, + DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); + SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); + Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift); + return true; +} + +SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, + SelectionDAG &DAG) const { + SDLoc SL(LD); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + EVT SrcVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + unsigned NumElem = SrcVT.getVectorNumElements(); + + EVT SrcEltVT = SrcVT.getScalarType(); + EVT DstEltVT = LD->getValueType(0).getScalarType(); + + unsigned Stride = SrcEltVT.getSizeInBits() / 8; + assert(SrcEltVT.isByteSized()); + + SmallVector<SDValue, 8> Vals; + SmallVector<SDValue, 8> LoadChains; + + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue ScalarLoad = + DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + + BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride); + + Vals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); + SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals); + + return DAG.getMergeValues({Value, NewChain}, SL); +} + +SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, + SelectionDAG &DAG) const { + SDLoc SL(ST); + + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + + // The type of the data we want to save + EVT RegVT = Value.getValueType(); + EVT RegSclVT = RegVT.getScalarType(); + + // The type of data as saved in memory. + EVT MemSclVT = StVT.getScalarType(); + + EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); + unsigned NumElem = StVT.getVectorNumElements(); + + // A vector must always be stored in memory as-is, i.e. without any padding + // between the elements, since various code depend on it, e.g. in the + // handling of a bitcast of a vector type to int, which may be done with a + // vector store followed by an integer load. A vector that does not have + // elements that are byte-sized must therefore be stored as an integer + // built out of the extracted vector elements. + if (!MemSclVT.isByteSized()) { + unsigned NumBits = StVT.getSizeInBits(); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + + SDValue CurrVal = DAG.getConstant(0, SL, IntVT); + + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, + DAG.getConstant(Idx, SL, IdxVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt); + SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc); + unsigned ShiftIntoIdx = + (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx); + SDValue ShiftAmount = + DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT); + SDValue ShiftedElt = + DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount); + CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt); + } + + return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(), + ST->getAlignment(), ST->getMemOperand()->getFlags(), + ST->getAAInfo()); + } + + // Store Stride in bytes + unsigned Stride = MemSclVT.getSizeInBits() / 8; + assert(Stride && "Zero stride!"); + // Extract each of the elements from the original vector and save them into + // memory individually. + SmallVector<SDValue, 8> Stores; + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, + DAG.getConstant(Idx, SL, IdxVT)); + + SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride); + + // This scalar TruncStore may be illegal, but we legalize it later. + SDValue Store = DAG.getTruncStore( + Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride), + MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride), + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + + Stores.push_back(Store); + } + + return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores); +} + +std::pair<SDValue, SDValue> +TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { + assert(LD->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed loads not implemented!"); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + EVT VT = LD->getValueType(0); + EVT LoadedVT = LD->getMemoryVT(); + SDLoc dl(LD); + auto &MF = DAG.getMachineFunction(); + + if (VT.isFloatingPoint() || VT.isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); + if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { + if (!isOperationLegalOrCustom(ISD::LOAD, intVT) && + LoadedVT.isVector()) { + // Scalarize the load and let the individual components be handled. + SDValue Scalarized = scalarizeVectorLoad(LD, DAG); + if (Scalarized->getOpcode() == ISD::MERGE_VALUES) + return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1)); + return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1)); + } + + // Expand to a (misaligned) integer load of the same size, + // then bitconvert to floating point or vector. + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, + LD->getMemOperand()); + SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); + if (LoadedVT != VT) + Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : + ISD::ANY_EXTEND, dl, VT, Result); + + return std::make_pair(Result, newLoad.getValue(1)); + } + + // Copy the value to a (aligned) stack slot using (unaligned) integer + // loads and stores, then do a (aligned) load from the stack slot. + MVT RegVT = getRegisterType(*DAG.getContext(), intVT); + unsigned LoadedBytes = LoadedVT.getStoreSize(); + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex(); + SmallVector<SDValue, 8> Stores; + SDValue StackPtr = StackBase; + unsigned Offset = 0; + + EVT PtrVT = Ptr.getValueType(); + EVT StackPtrVT = StackPtr.getValueType(); + + SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT); + SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT); + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the original location. + SDValue Load = DAG.getLoad( + RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), + MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), + LD->getAAInfo()); + // Follow the load with a store to the stack slot. Remember the store. + Stores.push_back(DAG.getStore( + Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset))); + // Increment the pointers. + Offset += RegBytes; + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement); + StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement); + } + + // The last copy may be partial. Do an extending load. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (LoadedBytes - Offset)); + SDValue Load = + DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(Offset), MemVT, + MinAlign(LD->getAlignment(), Offset), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + // Follow the load with a store to the stack slot. Remember the store. + // On big-endian machines this requires a truncating store to ensure + // that the bits end up in the right place. + Stores.push_back(DAG.getTruncStore( + Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT)); + + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + // Finally, perform the original load only redirected to the stack slot. + Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), + LoadedVT); + + // Callers expect a MERGE_VALUES node. + return std::make_pair(Load, TF); + } + + assert(LoadedVT.isInteger() && !LoadedVT.isVector() && + "Unaligned load of unsupported type."); + + // Compute the new VT that is half the size of the old one. This is an + // integer MVT. + unsigned NumBits = LoadedVT.getSizeInBits(); + EVT NewLoadedVT; + NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); + NumBits >>= 1; + + unsigned Alignment = LD->getAlignment(); + unsigned IncrementSize = NumBits / 8; + ISD::LoadExtType HiExtType = LD->getExtensionType(); + + // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. + if (HiExtType == ISD::NON_EXTLOAD) + HiExtType = ISD::ZEXTLOAD; + + // Load the value in two parts + SDValue Lo, Hi; + if (DAG.getDataLayout().isLittleEndian()) { + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, MinAlign(Alignment, IncrementSize), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + } else { + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, MinAlign(Alignment, IncrementSize), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + } + + // aggregate the two parts + SDValue ShiftAmount = + DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(), + DAG.getDataLayout())); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); + Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + return std::make_pair(Result, TF); +} + +SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, + SelectionDAG &DAG) const { + assert(ST->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed stores not implemented!"); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDValue Val = ST->getValue(); + EVT VT = Val.getValueType(); + int Alignment = ST->getAlignment(); + auto &MF = DAG.getMachineFunction(); + EVT StoreMemVT = ST->getMemoryVT(); + + SDLoc dl(ST); + if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (isTypeLegal(intVT)) { + if (!isOperationLegalOrCustom(ISD::STORE, intVT) && + StoreMemVT.isVector()) { + // Scalarize the store and let the individual components be handled. + SDValue Result = scalarizeVectorStore(ST, DAG); + return Result; + } + // Expand to a bitconvert of the value to the integer type of the + // same size, then a (misaligned) int store. + // FIXME: Does not handle truncating floating point stores! + SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); + Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + Alignment, ST->getMemOperand()->getFlags()); + return Result; + } + // Do a (aligned) store to a stack slot, then copy from the stack slot + // to the final destination using (unaligned) integer loads and stores. + MVT RegVT = getRegisterType( + *DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits())); + EVT PtrVT = Ptr.getValueType(); + unsigned StoredBytes = StoreMemVT.getStoreSize(); + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + + // Perform the original store, only redirected to the stack slot. + SDValue Store = DAG.getTruncStore( + Chain, dl, Val, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT); + + EVT StackPtrVT = StackPtr.getValueType(); + + SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT); + SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT); + SmallVector<SDValue, 8> Stores; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the stack slot. + SDValue Load = DAG.getLoad( + RegVT, dl, Store, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)); + // Store it to the final location. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo().getWithOffset(Offset), + MinAlign(ST->getAlignment(), Offset), + ST->getMemOperand()->getFlags())); + // Increment the pointers. + Offset += RegBytes; + StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement); + } + + // The last store may be partial. Do a truncating store. On big-endian + // machines this requires an extending load from the stack slot to ensure + // that the bits are in the right place. + EVT LoadMemVT = + EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset)); + + // Load from the stack slot. + SDValue Load = DAG.getExtLoad( + ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT); + + Stores.push_back( + DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo().getWithOffset(Offset), LoadMemVT, + MinAlign(ST->getAlignment(), Offset), + ST->getMemOperand()->getFlags(), ST->getAAInfo())); + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + return Result; + } + + assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() && + "Unaligned store of unknown type."); + // Get the half-size VT + EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext()); + int NumBits = NewStoredVT.getSizeInBits(); + int IncrementSize = NumBits / 8; + + // Divide the stored value in two parts. + SDValue ShiftAmount = DAG.getConstant( + NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout())); + SDValue Lo = Val; + SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); + + // Store the two parts + SDValue Store1, Store2; + Store1 = DAG.getTruncStore(Chain, dl, + DAG.getDataLayout().isLittleEndian() ? Lo : Hi, + Ptr, ST->getPointerInfo(), NewStoredVT, Alignment, + ST->getMemOperand()->getFlags()); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Alignment = MinAlign(Alignment, IncrementSize); + Store2 = DAG.getTruncStore( + Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment, + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + return Result; +} + +SDValue +TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, + const SDLoc &DL, EVT DataVT, + SelectionDAG &DAG, + bool IsCompressedMemory) const { + SDValue Increment; + EVT AddrVT = Addr.getValueType(); + EVT MaskVT = Mask.getValueType(); + assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() && + "Incompatible types of Data and Mask"); + if (IsCompressedMemory) { + // Incrementing the pointer according to number of '1's in the mask. + EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits()); + SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask); + if (MaskIntVT.getSizeInBits() < 32) { + MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg); + MaskIntVT = MVT::i32; + } + + // Count '1's with POPCNT. + Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg); + Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT); + // Scale is an element size in bytes. + SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL, + AddrVT); + Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale); + } else + Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); + + return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment); +} + +static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, + SDValue Idx, + EVT VecVT, + const SDLoc &dl) { + if (isa<ConstantSDNode>(Idx)) + return Idx; + + EVT IdxVT = Idx.getValueType(); + unsigned NElts = VecVT.getVectorNumElements(); + if (isPowerOf2_32(NElts)) { + APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), + Log2_32(NElts)); + return DAG.getNode(ISD::AND, dl, IdxVT, Idx, + DAG.getConstant(Imm, dl, IdxVT)); + } + + return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, + DAG.getConstant(NElts - 1, dl, IdxVT)); +} + +SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, + SDValue VecPtr, EVT VecVT, + SDValue Index) const { + SDLoc dl(Index); + // Make sure the index type is big enough to compute in. + Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType()); + + EVT EltVT = VecVT.getVectorElementType(); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltVT.getSizeInBits() && + "Converting bits to bytes lost precision"); + + Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl); + + EVT IdxVT = Index.getValueType(); + + Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index, + DAG.getConstant(EltSize, dl, IdxVT)); + return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index); +} + +//===----------------------------------------------------------------------===// +// Implementation of Emulated TLS Model +//===----------------------------------------------------------------------===// + +SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, + SelectionDAG &DAG) const { + // Access to address of TLS varialbe xyz is lowered to a function call: + // __emutls_get_address( address of global variable named "__emutls_v.xyz" ) + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext()); + SDLoc dl(GA); + + ArgListTy Args; + ArgListEntry Entry; + std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str(); + Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent()); + StringRef EmuTlsVarName(NameString); + GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName); + assert(EmuTlsVar && "Cannot find EmuTlsVar "); + Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT); + Entry.Ty = VoidPtrType; + Args.push_back(Entry); + + SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()); + CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args)); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); + + // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. + // At last for X86 targets, maybe good for other targets too? + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setAdjustsStack(true); // Is this only for X86 target? + MFI.setHasCalls(true); + + assert((GA->getOffset() == 0) && + "Emulated TLS must have zero offset in GlobalAddressSDNode"); + return CallResult.first; +} + +SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, + SelectionDAG &DAG) const { + assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node."); + if (!isCtlzFast()) + return SDValue(); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + SDLoc dl(Op); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (C->isNullValue() && CC == ISD::SETEQ) { + EVT VT = Op.getOperand(0).getValueType(); + SDValue Zext = Op.getOperand(0); + if (VT.bitsLT(MVT::i32)) { + VT = MVT::i32; + Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); + } + unsigned Log2b = Log2_32(VT.getSizeInBits()); + SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); + SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, + DAG.getConstant(Log2b, dl, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); + } + } + return SDValue(); +} + +SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { + unsigned Opcode = Node->getOpcode(); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + SDLoc dl(Node); + + assert(VT == RHS.getValueType() && "Expected operands to be the same type"); + assert(VT.isInteger() && "Expected operands to be integers"); + + // usub.sat(a, b) -> umax(a, b) - b + if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) { + SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS); + return DAG.getNode(ISD::SUB, dl, VT, Max, RHS); + } + + if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) { + SDValue InvRHS = DAG.getNOT(dl, RHS, VT); + SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS); + return DAG.getNode(ISD::ADD, dl, VT, Min, RHS); + } + + unsigned OverflowOp; + switch (Opcode) { + case ISD::SADDSAT: + OverflowOp = ISD::SADDO; + break; + case ISD::UADDSAT: + OverflowOp = ISD::UADDO; + break; + case ISD::SSUBSAT: + OverflowOp = ISD::SSUBO; + break; + case ISD::USUBSAT: + OverflowOp = ISD::USUBO; + break; + default: + llvm_unreachable("Expected method to receive signed or unsigned saturation " + "addition or subtraction node."); + } + + unsigned BitWidth = LHS.getScalarValueSizeInBits(); + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), + LHS, RHS); + SDValue SumDiff = Result.getValue(0); + SDValue Overflow = Result.getValue(1); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue AllOnes = DAG.getAllOnesConstant(dl, VT); + + if (Opcode == ISD::UADDSAT) { + if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) { + // (LHS + RHS) | OverflowMask + SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT); + return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask); + } + // Overflow ? 0xffff.... : (LHS + RHS) + return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff); + } else if (Opcode == ISD::USUBSAT) { + if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) { + // (LHS - RHS) & ~OverflowMask + SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT); + SDValue Not = DAG.getNOT(dl, OverflowMask, VT); + return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not); + } + // Overflow ? 0 : (LHS - RHS) + return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff); + } else { + // SatMax -> Overflow && SumDiff < 0 + // SatMin -> Overflow && SumDiff >= 0 + APInt MinVal = APInt::getSignedMinValue(BitWidth); + APInt MaxVal = APInt::getSignedMaxValue(BitWidth); + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin); + return DAG.getSelect(dl, VT, Overflow, Result, SumDiff); + } +} + +SDValue +TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { + assert((Node->getOpcode() == ISD::SMULFIX || + Node->getOpcode() == ISD::UMULFIX || + Node->getOpcode() == ISD::SMULFIXSAT || + Node->getOpcode() == ISD::UMULFIXSAT) && + "Expected a fixed point multiplication opcode"); + + SDLoc dl(Node); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + unsigned Scale = Node->getConstantOperandVal(2); + bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT || + Node->getOpcode() == ISD::UMULFIXSAT); + bool Signed = (Node->getOpcode() == ISD::SMULFIX || + Node->getOpcode() == ISD::SMULFIXSAT); + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + unsigned VTSize = VT.getScalarSizeInBits(); + + if (!Scale) { + // [us]mul.fix(a, b, 0) -> mul(a, b) + if (!Saturating) { + if (isOperationLegalOrCustom(ISD::MUL, VT)) + return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) { + SDValue Result = + DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + SDValue Product = Result.getValue(0); + SDValue Overflow = Result.getValue(1); + SDValue Zero = DAG.getConstant(0, dl, VT); + + APInt MinVal = APInt::getSignedMinValue(VTSize); + APInt MaxVal = APInt::getSignedMaxValue(VTSize); + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + return DAG.getSelect(dl, VT, Overflow, Result, Product); + } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) { + SDValue Result = + DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + SDValue Product = Result.getValue(0); + SDValue Overflow = Result.getValue(1); + + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + return DAG.getSelect(dl, VT, Overflow, SatMax, Product); + } + } + + assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) && + "Expected scale to be less than the number of bits if signed or at " + "most the number of bits if unsigned."); + assert(LHS.getValueType() == RHS.getValueType() && + "Expected both operands to be the same type"); + + // Get the upper and lower bits of the result. + SDValue Lo, Hi; + unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; + unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU; + if (isOperationLegalOrCustom(LoHiOp, VT)) { + SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS); + Lo = Result.getValue(0); + Hi = Result.getValue(1); + } else if (isOperationLegalOrCustom(HiOp, VT)) { + Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS); + } else if (VT.isVector()) { + return SDValue(); + } else { + report_fatal_error("Unable to expand fixed point multiplication."); + } + + if (Scale == VTSize) + // Result is just the top half since we'd be shifting by the width of the + // operand. Overflow impossible so this works for both UMULFIX and + // UMULFIXSAT. + return Hi; + + // The result will need to be shifted right by the scale since both operands + // are scaled. The result is given to us in 2 halves, so we only want part of + // both in the result. + EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo, + DAG.getConstant(Scale, dl, ShiftTy)); + if (!Saturating) + return Result; + + if (!Signed) { + // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the + // widened multiplication) aren't all zeroes. + + // Saturate to max if ((Hi >> Scale) != 0), + // which is the same as if (Hi > ((1 << Scale) - 1)) + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, LowMask, + DAG.getConstant(MaxVal, dl, VT), Result, + ISD::SETUGT); + + return Result; + } + + // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the + // widened multiplication) aren't all ones or all zeroes. + + SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT); + SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT); + + if (Scale == 0) { + SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo, + DAG.getConstant(VTSize - 1, dl, ShiftTy)); + SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE); + // Saturated to SatMin if wide product is negative, and SatMax if wide + // product is positive ... + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax, + ISD::SETLT); + // ... but only if we overflowed. + return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result); + } + + // We handled Scale==0 above so all the bits to examine is in Hi. + + // Saturate to max if ((Hi >> (Scale - 1)) > 0), + // which is the same as if (Hi > (1 << (Scale - 1)) - 1) + SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT); + // Saturate to min if (Hi >> (Scale - 1)) < -1), + // which is the same as if (HI < (-1 << (Scale - 1)) + SDValue HighMask = + DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT); + return Result; +} + +void TargetLowering::expandUADDSUBO( + SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const { + SDLoc dl(Node); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + bool IsAdd = Node->getOpcode() == ISD::UADDO; + + // If ADD/SUBCARRY is legal, use that instead. + unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY; + if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) { + SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1)); + SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(), + { LHS, RHS, CarryIn }); + Result = SDValue(NodeCarry.getNode(), 0); + Overflow = SDValue(NodeCarry.getNode(), 1); + return; + } + + Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl, + LHS.getValueType(), LHS, RHS); + + EVT ResultType = Node->getValueType(1); + EVT SetCCType = getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); + ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; + SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC); + Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType); +} + +void TargetLowering::expandSADDSUBO( + SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const { + SDLoc dl(Node); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + bool IsAdd = Node->getOpcode() == ISD::SADDO; + + Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl, + LHS.getValueType(), LHS, RHS); + + EVT ResultType = Node->getValueType(1); + EVT OType = getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); + + // If SADDSAT/SSUBSAT is legal, compare results to detect overflow. + unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT; + if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) { + SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS); + SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE); + Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType); + return; + } + + SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); + + // For an addition, the result should be less than one of the operands (LHS) + // if and only if the other operand (RHS) is negative, otherwise there will + // be overflow. + // For a subtraction, the result should be less than one of the operands + // (LHS) if and only if the other operand (RHS) is (non-zero) positive, + // otherwise there will be overflow. + SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT); + SDValue ConditionRHS = + DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT); + + Overflow = DAG.getBoolExtOrTrunc( + DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl, + ResultType, ResultType); +} + +bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, + SDValue &Overflow, SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + bool isSigned = Node->getOpcode() == ISD::SMULO; + + // For power-of-two multiplications we can use a simpler shift expansion. + if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) { + const APInt &C = RHSC->getAPIntValue(); + // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X } + if (C.isPowerOf2()) { + // smulo(x, signed_min) is same as umulo(x, signed_min). + bool UseArithShift = isSigned && !C.isMinSignedValue(); + EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy); + Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt); + Overflow = DAG.getSetCC(dl, SetCCVT, + DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL, + dl, VT, Result, ShiftAmt), + LHS, ISD::SETNE); + return true; + } + } + + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2); + if (VT.isVector()) + WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT, + VT.getVectorNumElements()); + + SDValue BottomHalf; + SDValue TopHalf; + static const unsigned Ops[2][3] = + { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND }, + { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }}; + if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) { + BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS); + } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) { + BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, + RHS); + TopHalf = BottomHalf.getValue(1); + } else if (isTypeLegal(WideVT)) { + LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); + RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); + SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); + BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl, + getShiftAmountTy(WideVT, DAG.getDataLayout())); + TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt)); + } else { + if (VT.isVector()) + return false; + + // We can fall back to a libcall with an illegal type for the MUL if we + // have a libcall big enough. + // Also, we can fall back to a division in some cases, but that's a big + // performance hit in the general case. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (WideVT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (WideVT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (WideVT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (WideVT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); + + SDValue HiLHS; + SDValue HiRHS; + if (isSigned) { + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + HiLHS = + DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize - 1, dl, + getPointerTy(DAG.getDataLayout()))); + HiRHS = + DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize - 1, dl, + getPointerTy(DAG.getDataLayout()))); + } else { + HiLHS = DAG.getConstant(0, dl, VT); + HiRHS = DAG.getConstant(0, dl, VT); + } + + // Here we're passing the 2 arguments explicitly as 4 arguments that are + // pre-lowered to the correct types. This all depends upon WideVT not + // being a legal type for the architecture and thus has to be split to + // two arguments. + SDValue Ret; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + CallOptions.setIsPostTypeLegalization(true); + if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) { + // Halves of WideVT are packed into registers in different order + // depending on platform endianness. This is usually handled by + // the C calling convention, but we can't defer to it in + // the legalizer. + SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; + } else { + SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; + } + assert(Ret.getOpcode() == ISD::MERGE_VALUES && + "Ret value is a collection of constituent nodes holding result."); + if (DAG.getDataLayout().isLittleEndian()) { + // Same as above. + BottomHalf = Ret.getOperand(0); + TopHalf = Ret.getOperand(1); + } else { + BottomHalf = Ret.getOperand(1); + TopHalf = Ret.getOperand(0); + } + } + + Result = BottomHalf; + if (isSigned) { + SDValue ShiftAmt = DAG.getConstant( + VT.getScalarSizeInBits() - 1, dl, + getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout())); + SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt); + Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE); + } else { + Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, + DAG.getConstant(0, dl, VT), ISD::SETNE); + } + + // Truncate the result if SetCC returns a larger type than needed. + EVT RType = Node->getValueType(1); + if (RType.getSizeInBits() < Overflow.getValueSizeInBits()) + Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow); + + assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() && + "Unexpected result type for S/UMULO legalization"); + return true; +} + +SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const { + SDLoc dl(Node); + bool NoNaN = Node->getFlags().hasNoNaNs(); + unsigned BaseOpcode = 0; + switch (Node->getOpcode()) { + default: llvm_unreachable("Expected VECREDUCE opcode"); + case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break; + case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break; + case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break; + case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break; + case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break; + case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break; + case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break; + case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break; + case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break; + case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break; + case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break; + case ISD::VECREDUCE_FMAX: + BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM; + break; + case ISD::VECREDUCE_FMIN: + BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM; + break; + } + + SDValue Op = Node->getOperand(0); + EVT VT = Op.getValueType(); + + // Try to use a shuffle reduction for power of two vectors. + if (VT.isPow2VectorType()) { + while (VT.getVectorNumElements() > 1) { + EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); + if (!isOperationLegalOrCustom(BaseOpcode, HalfVT)) + break; + + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(Op, dl); + Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi); + VT = HalfVT; + } + } + + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + + SmallVector<SDValue, 8> Ops; + DAG.ExtractVectorElements(Op, Ops, 0, NumElts); + + SDValue Res = Ops[0]; + for (unsigned i = 1; i < NumElts; i++) + Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags()); + + // Result type may be wider than element type. + if (EltVT != Node->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res); + return Res; +} |
